In [28]:
from langchain_groq import ChatGroq

In [29]:
llm = ChatGroq(
    temperature=0, 
    groq_api_key='', 
    model_name="llama3-70b-8192"
)
response = llm.invoke("The first person to land on moon was ...")
print(response.content)

That's an easy one!

The first person to set foot on the moon was Neil Armstrong. He stepped out of the lunar module Eagle and onto the moon's surface on July 20, 1969, during the Apollo 11 mission. Armstrong famously declared, "That's one small step for man, one giant leap for mankind," as he became the first human to walk on the moon.


In [30]:
from langchain_community.document_loaders import WebBaseLoader
import re

loader = WebBaseLoader("https://careers.amd.com/careers-home/jobs/63579?lang=en-us&iis=Job%20Board&iisn=Linkedin")
page_data = loader.load().pop().page_content
cleaned_page_data = re.sub(r'\s+', ' ', page_data).strip()
# cleaned_page_data = cleaned_page_data.strip()[:6000]
print(cleaned_page_data)

Software Engineering Intern/Co-Op (Graduate | Fall 2025 | Hybrid) in San Jose, California | Advanced Micro Devices, Inc Skip to Main Content Careers About AMD Student Programs Benefits Job Categories Engineering Students Corporate Marketing and Sales Global Jobs Americas APJ EMEA Greater China India Returning User Login AMD does not require or seek to collect a fee or payment from candidates in the application or interview process. We do not conduct interviews by text messaging. Nor does AMD require copies of IDs, passports, or other identification as a part of the interview process. If you have experienced these requests, this is a scam, and you may wish to consider making a report to ReportFraud.ftc.gov or IC3.gov. We encourage job seekers interested in AMD roles to apply on the amd.com Careers page. For AMD employees looking to refer someone or search for new opportunities, please use the Internal Career Site. Careers Terms and Conditions Privacy Trademarks Supply Chain Transparency

In [31]:
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import re
cleaned_page_data = ""
async def get_rendered_html(url):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url, timeout=60000)
        await page.wait_for_load_state("networkidle")
        content = await page.content()
        await browser.close()
        return content

async def main():
    global cleaned_page_data
    url = "https://careers.amd.com/careers-home/jobs/63579?lang=en-us&iis=Job%20Board&iisn=Linkedin"
    html = await get_rendered_html(url)
    soup = BeautifulSoup(html, 'html.parser')

    # Clean script/style tags
    for tag in soup(['script', 'style']):
        tag.decompose()

    text = soup.get_text(separator=' ')
    cleaned_page_data = re.sub(r'\s+', ' ', text).strip()[:6000]
    print(cleaned_page_data[:5000])  # Show partial output

# Run async function in notebook
await main()


Software Engineering Intern/Co-Op (Graduate | Fall 2025 | Hybrid) in San Jose, California | Advanced Micro Devices, Inc Skip to Main Content Careers About AMD Student Programs Benefits Job Categories Engineering Students Corporate Marketing and Sales Global Jobs Americas APJ EMEA Greater China India Returning User Login AMD does not require or seek to collect a fee or payment from candidates in the application or interview process. We do not conduct interviews by text messaging. Nor does AMD require copies of IDs, passports, or other identification as a part of the interview process. If you have experienced these requests, this is a scam, and you may wish to consider making a report to ReportFraud.ftc.gov or IC3.gov. We encourage job seekers interested in AMD roles to apply on the amd.com Careers page. For AMD employees looking to refer someone or search for new opportunities, please use the Internal Career Site . Back Software Engineering Intern/Co-Op (Graduate | Fall 2025 | Hybrid) J

In [32]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

chain_extract = prompt_extract | llm 
res = chain_extract.invoke(input={'page_data':cleaned_page_data})
print(res.content)

[
    {
        "role": "Software Engineering Intern/Co-Op",
        "experience": "Graduate",
        "skills": [
            "C/C++",
            "Verilog",
            "VHDL",
            "Python",
            "Perl",
            "Spice",
            "Ruby",
            "Chrome OS",
            "MATLAB",
            "ASIC design and verification",
            "ML and AI",
            "System/Board design, circuit board layout, signal integrity, power design",
            "Hardware automation, validation and test",
            "UNIX and JTAG tools",
            "Pre-silicon and/or post-silicon platforms",
            "Computer architecture, CPU & GPU architecture"
        ],
        "description": "Collaborate with Engineers and be responsible for some of AMD’s discrete and embedded hardware products. We will train you to build applications for external and internal use as well as large scale systems that improve the efficiency of AMD Hardware organization."
    }
]


In [33]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

[{'role': 'Software Engineering Intern/Co-Op',
  'experience': 'Graduate',
  'skills': ['C/C++',
   'Verilog',
   'VHDL',
   'Python',
   'Perl',
   'Spice',
   'Ruby',
   'Chrome OS',
   'MATLAB',
   'ASIC design and verification',
   'ML and AI',
   'System/Board design, circuit board layout, signal integrity, power design',
   'Hardware automation, validation and test',
   'UNIX and JTAG tools',
   'Pre-silicon and/or post-silicon platforms',
   'Computer architecture, CPU & GPU architecture'],
  'description': 'Collaborate with Engineers and be responsible for some of AMD’s discrete and embedded hardware products. We will train you to build applications for external and internal use as well as large scale systems that improve the efficiency of AMD Hardware organization.'}]

In [34]:
type(json_res)

list

In [36]:
import pandas as pd

df = pd.read_csv("my_portfolio.csv")
df

Unnamed: 0,Project,Techstack,Description,Link
0,Cloud-Based ETL Pipeline,"AWS, Apache Airflow, EC2, Glue, Redshift",Built a cloud-based ETL pipeline using Apache ...,#
1,Satellite Image Segmentation,"U-Net, ResNet18, Python, Deep Learning",Segmented and classified solar panel regions f...,https://colab.research.google.com/drive/1hTUcI...
2,EDM Surface Inspection,"KNN, Machine Learning, Python",Predicted surface roughness of EDM materials u...,https://link.springer.com/chapter/10.1007/978-...
3,CPU Scheduling Simulator,"JavaScript, HTML, CSS",Created an interactive CPU scheduling visualiz...,https://jeminbutani.github.io/CpuScheduling/
4,React.js Company Website,"React, Node.js, Contentful, Azure",Built a responsive 10+ page company website wi...,https://aasmatech.com/
5,ABHA API Integration,"Python, Flask, Node.js",Developed a reusable API wrapper for Indian he...,https://jemil-portfolio.vercel.app/
6,Role-based Auth + Payments,"MongoDB, Mongoose, Node.js, Stripe",Built a role-based authentication system with ...,hhttps://github.com/Jem1D/Online_Shop_NodeJS
7,Food Delivery Data Analysis,"Python, Pandas, Seaborn, Matplotlib",Performed EDA on food delivery data to identif...,https://github.com/Jem1D/Food_delivery_analysis


In [37]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])