# Web Scraping - Job Posting

In [1]:
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
url = "https://www.google.com/about/careers/applications/jobs/results/122573769037226694-senior-technical-program-manager-i-infrastructure-google-cloud"
loader = WebBaseLoader(url)
page_data = loader.load().pop().page_content

In [5]:
print(page_data)

Senior Technical Program Manager I, Infrastructure, Google Cloud — Google CareersCareersSkip navigation linkshomehomeHomeHomework_outlinework_outlineJobsJobsnoogler_hatnoogler_hatStudentsStudentsgooglegoogleHow we workHow we workhandymanhandymanHow we hireHow we hireperson_outlineperson_outlineYour careerYour careerhelp_outlineHelp linkfeedbackSend feedbackmore_vert HelpSend FeedbackSign inCareershomeHomework_outlineJobsexpand_morenoogler_hatStudentsexpand_moregoogleHow we workexpand_morehandymanHow we hireexpand_moreperson_outlineYour careerexpand_morejob detailsarrow_backBack to jobs searchJobs search results3,400  jobs matchedSoftware Engineer III, Infrastructure, CoreBengaluru, Karnataka, IndiaSenior Software Engineer, AI/ML GenAI, Google Cloud AISunnyvale, CA, USA; Kirkland, WA, USATechnical Program Manager, Edge Capacity DeliveryDublin, IrelandSoftware Engineer III, Google CloudBengaluru, Karnataka, India; Hyderabad, Telangana, India; +2 more; +1 moreSenior Software Engineer, Cor

## Loading llm from groq

In [9]:
from langchain_groq import ChatGroq

In [10]:
llm = ChatGroq(

    temperature = 0,
    groq_api_key = "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",
    model_name = "llama-3.3-70b-versatile"

)

## Formatting scraped data

In [7]:
from langchain_core.prompts import PromptTemplate

In [23]:
prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: ["role", "experience", "skills" and "description".]
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

In [24]:
chain_extract = prompt_extract | llm
res = chain_extract.invoke(input = {'page_data':page_data})

In [25]:
print(res.content)

```json
[
  {
    "role": "Senior Technical Program Manager I, Infrastructure, Google Cloud",
    "experience": "8 years of experience in program management and 8 years of infrastructure work experience",
    "skills": "Experience owning outcomes and decision making, solving ambiguous problems and influencing stakeholders; deep expertise in domain",
    "description": "Implement communications standards across a portfolio of programs including executive and key partner communications. Establish a reliable and visible cadence for program reviews, decision-making, prioritization, and Resource Stewardship."
  },
  {
    "role": "Software Engineer III, Infrastructure, Core",
    "experience": "Not specified",
    "skills": "Not specified",
    "description": "Not specified"
  },
  {
    "role": "Senior Software Engineer, AI/ML GenAI, Google Cloud AI",
    "experience": "Not specified",
    "skills": "Not specified",
    "description": "Not specified"
  },
  {
    "role": "Technical Program

In [26]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res


[{'role': 'Senior Technical Program Manager I, Infrastructure, Google Cloud',
  'experience': '8 years of experience in program management and 8 years of infrastructure work experience',
  'skills': 'Experience owning outcomes and decision making, solving ambiguous problems and influencing stakeholders; deep expertise in domain',
  'description': 'Implement communications standards across a portfolio of programs including executive and key partner communications. Establish a reliable and visible cadence for program reviews, decision-making, prioritization, and Resource Stewardship.'},
 {'role': 'Software Engineer III, Infrastructure, Core',
  'experience': 'Not specified',
  'skills': 'Not specified',
  'description': 'Not specified'},
 {'role': 'Senior Software Engineer, AI/ML GenAI, Google Cloud AI',
  'experience': 'Not specified',
  'skills': 'Not specified',
  'description': 'Not specified'},
 {'role': 'Technical Program Manager, Edge Capacity Delivery',
  'experience': 'Not speci

In [29]:
json_res = json_res[0]

In [31]:
job = json_res

In [32]:
job["skills"]

'Experience owning outcomes and decision making, solving ambiguous problems and influencing stakeholders; deep expertise in domain'

# Adding data to chromaDB

In [33]:
import pandas as pd 

In [34]:
df = pd.read_csv("my_portfolio.csv")
df.shape

(20, 2)

In [35]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])


In [36]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links


[[{'links': 'https://example.com/devops-portfolio'},
  {'links': 'https://example.com/ml-python-portfolio'}]]

# Generate email 

In [37]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Kiran, a business development executive at ABCDEF. ABCDEF is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase ABCDEF's portfolio: {link_list}
        Remember you are Kiran, BDE at ABCDEF. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )


In [38]:
chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})

In [39]:
print(res.content)

Subject: Expert Technical Program Management for Google Cloud Infrastructure

Dear Hiring Manager,

I came across the job description for a Senior Technical Program Manager I, Infrastructure, Google Cloud, and I am excited to introduce ABCDEF, a leading AI & Software Consulting company. With our expertise in facilitating seamless integration of business processes through automated tools, I believe we can help fulfill your requirements.

At ABCDEF, we have a proven track record of empowering enterprises with tailored solutions, resulting in scalability, process optimization, cost reduction, and heightened overall efficiency. Our team has extensive experience in program management and infrastructure work, with a deep understanding of owning outcomes, decision-making, and solving ambiguous problems.

We can help implement communications standards across your portfolio of programs, including executive and key partner communications. Our team can establish a reliable and visible cadence for