In [7]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    temperature=0,
    groq_api_key='dummy',
    model_name='llama-3.1-8b-instant'
)
response = llm.invoke("The first person to land on moon was?")
print(response.content)

The first person to land on the moon was Neil Armstrong. He stepped out of the lunar module Eagle and onto the moon's surface on July 20, 1969, during the Apollo 11 mission.


In [11]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://jobs.apple.com/en-us/details/200572938/data-engineer?team=SFTWR")
page_data = loader.load().pop().page_content
print(page_data)










Data Engineer  - Careers at Apple







AppleStoreMaciPadiPhoneWatchVisionAirPodsTV & HomeEntertainmentAccessoriesSupport


0+
Careers at AppleOpen MenuClose Menu

      Work at Apple
    
 

      Life at Apple
    
 

      Profile
    
 

      Sign In
    
 
Search
Jobs at Apple
Data Engineer Austin, Texas, United StatesSoftware and ServicesAdd to Favorites Data Engineer Removed from favoritesAdd a favoriteCloseTo view your favorites, sign in with your Apple Account.Sign InDon’t have an Apple Account?Create one nowForgot your Apple Account or password?Submit ResumeData Engineer Back to search resultsSummaryPosted: Mar 12, 2025Weekly Hours: 40 Role Number:200572938Apple is a place where extraordinary people gather to do their best work. If you’re excited by the idea of making a real impact, a career with Apple might be your dream job…just be prepared to dream big! An expert in analytics, you are passionate about turning data into impactful insights, and driving creative da

In [19]:
from langchain_core.prompts import PromptTemplate
prompt_extract = PromptTemplate.from_template(
    """ Scraped text from website
    {page_data}
    ### INSTRUCTION:
    The scraped text is from the career's page of a website.
    Your job is to extract the job postings and return them in JSON format containing
    following keys: 'role', 'experience', 'skills' and 'description'.
    Only return the valid JSON.
    ### VALID Json (NO PREAMBLE):
    """
)
chain_extract = prompt_extract | llm
res = chain_extract.invoke(input={'page_data':page_data})
print(res.content)

```json
[
  {
    "role": "Data Engineer",
    "experience": "Minimum 5 years experience within Operations and Supply Chain desired",
    "skills": [
      "Expert level fluency with Snowflake, SQL, Tableau, and Python",
      "Experience with data modeling in Snowflake, performance tuning the queries and optimization",
      "Computational analysis using Snowflake, mySQL, Teradata, Python, Tableau, Business Objects, JMP, R, Matlab, SPSS and working knowledge of SAP/S4 data structures"
    ],
    "description": "The Operations team at Apple is looking for a dynamic and creative candidate for the role of a Data Engineer. The role is an opportunity for a self-driven individual to utilize their business acumen, process knowledge, and apply analytical skills to deliver value-added solutions and automation to the Operations team at Apple."
  }
]
```

Note: The extracted information is based on the provided text and might not be comprehensive or up-to-date.


In [21]:
from langchain_core.output_parsers import JsonOutputParser
json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

[{'role': 'Data Engineer',
  'experience': 'Minimum 5 years experience within Operations and Supply Chain desired',
  'skills': ['Expert level fluency with Snowflake, SQL, Tableau, and Python',
   'Experience with data modeling in Snowflake, performance tuning the queries and optimization',
   'Computational analysis using Snowflake, mySQL, Teradata, Python, Tableau, Business Objects, JMP, R, Matlab, SPSS and working knowledge of SAP/S4 data structures'],
  'description': 'The Operations team at Apple is looking for a dynamic and creative candidate for the role of a Data Engineer. The role is an opportunity for a self-driven individual to utilize their business acumen, process knowledge, and apply analytical skills to deliver value-added solutions and automation to the Operations team at Apple.'}]

In [33]:
import pandas as pd
df = pd.read_csv("roles_skills_links.csv")
df.head()

Unnamed: 0,Role,Skills,Links
0,Data Analyst,"SQL, Python, Tableau, PowerBI",https://example.com/data-analyst
1,Data Scientist,"Python, sklearn, matplotlib, ML",https://example.com/data-scientist
2,Machine Learning Engineer,"Python, TensorFlow, PyTorch, ML, sklearn",https://example.com/ml-engineer
3,Business Intelligence Analyst,"SQL, PowerBI, Tableau, Excel",https://example.com/bi-analyst
4,Data Engineer,"Python, SQL, Spark, Airflow, Snowflake",https://example.com/data-engineer


In [55]:
import chromadb
import uuid
chroma_client = chromadb.PersistentClient('vectorstore')
collection_names = chroma_client.list_collections()
if "skills" not in collection_names:
    collection = chroma_client.create_collection(name="skills")
else:
    collection = chroma_client.get_collection(name="skills")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Skills"],
                        metadatas={"links":row["Links"]},
                        ids=[str(uuid.uuid4())])

In [71]:
job= json_res[0]
job['skills']

['Expert level fluency with Snowflake, SQL, Tableau, and Python',
 'Experience with data modeling in Snowflake, performance tuning the queries and optimization',
 'Computational analysis using Snowflake, mySQL, Teradata, Python, Tableau, Business Objects, JMP, R, Matlab, SPSS and working knowledge of SAP/S4 data structures']

In [67]:
links = collection.query(query_texts=job['skills'],n_results=2).get('metadatas')
links

[[{'links': 'https://example.com/data-engineer'},
  {'links': 'https://example.com/data-architect'}],
 [{'links': 'https://example.com/data-architect'},
  {'links': 'https://example.com/data-engineer'}],
 [{'links': 'https://example.com/data-architect'},
  {'links': 'https://example.com/statistician'}]]

In [69]:
prompt_email= PromptTemplate.from_template(
    """
###JOB DESCRIPTION:
{job_description}

### INSTRUCTION:
You are Bikramjeet Singh, an experienced Data Engineer at TCS(an IT Consultant company) with 3 years of experience. 
You job is to write mail to the Hiring manager for the job at their company to consider you for the the role at their company.
Do not provide preamble.
### EMAIL (NO PREAMBLE

"""
)
chain_email = prompt_email | llm
res = chain_email.invoke({"job_description":str(job),"link_list":links})
print(res.content)

Subject: Application for Data Engineer Role at Apple

Dear Hiring Manager,

I am writing to express my interest in the Data Engineer position at Apple's Operations team. As a seasoned Data Engineer with 3 years of experience at TCS, I am confident that my skills and expertise align with the requirements of the role.

With a strong background in data modeling, performance tuning, and optimization using Snowflake, SQL, Tableau, and Python, I am well-equipped to deliver value-added solutions and automation to the Operations team at Apple. My experience in computational analysis using various tools such as Snowflake, mySQL, Teradata, Python, Tableau, Business Objects, JMP, R, Matlab, and SPSS has provided me with a unique understanding of data structures and their applications.

Although I may not meet the minimum 5 years of experience requirement, I am a self-driven individual with a strong passion for learning and growth. I am confident that my enthusiasm, business acumen, and process kn