In [1]:
# conda create --name cold_email python=3.10

In [4]:
from langchain_groq import ChatGroq

In [5]:
llm = ChatGroq(
    temperature=0,  # Controls the randomness of the model's responses
    # the same input will always yield the same response
    groq_api_key='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', 
    model_name="llama-3.3-70b-versatile"
)
response = llm.invoke("The first person to land on moon was ...")
print(response.content)

The first person to land on the moon was Neil Armstrong. He stepped out of the lunar module Eagle and onto the moon's surface on July 20, 1969, during the Apollo 11 mission. Armstrong famously declared, "That's one small step for man, one giant leap for mankind," as he became the first human to set foot on the moon.


In [6]:
# conda install beautifulsoup4
# conda install requests
# conda install lxml

In [21]:
from langchain_community.document_loaders import WebBaseLoader

# use latest job link of amazon
loader = WebBaseLoader("https://www.amazon.jobs/en/jobs/2888725/sde-ii-amazon")
page_data = loader.load().pop().page_content
print(page_data)

SDE II, Amazon - Job ID: 2888725 | Amazon.jobs
Skip to main contentHomeTeamsLocationsJob categoriesMy careerMy applicationsMy profileAccount securitySettingsSign outResourcesDisability accommodationsBenefitsDiversity, equity, and inclusionInterview tipsLeadership principlesWorking at AmazonFAQ×SDE II, AmazonJob ID: 2888725 | ADCI HYD 13 SEZApply nowDESCRIPTIONWould you like to work on one of the world's largest transactional distributed systems? How about working with customers and peers from the entire range of Amazon's business on cool new features? Whether you're passionate about building highly scalable and reliable systems or a software developer who likes to solve business problems, Tax Services is the place for you.We are also responsible for the tax invoicing platform. We provide the core services that generate tax invoicing at Amazon. We thrive on providing the correct tax amounts to the customer at order time, and make sure audit records are stored safely to meet tax law requ

In [22]:
# define a prompt template 
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)
# Creating a chain with the prompt template and LLM 
# | operator is used to create a chain
chain_extract = prompt_extract | llm 

# Invoking the chain with input data
res = chain_extract.invoke(input={'page_data':page_data})
type(res.content)

str

In [23]:
from langchain_core.output_parsers import JsonOutputParser
# parse() method parses the JSON output and returns a Python dictionary representation of the JSON data.

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

{'role': 'SDE II',
 'experience': '3+ years of non-internship professional software development experience, 2+ years of non-internship design or architecture experience',
 'skills': ['Java',
  'Linux',
  'software programming language',
  'design patterns',
  'reliability and scaling',
  'coding standards',
  'code reviews',
  'source control management',
  'build processes',
  'testing',
  'operations'],
 'description': 'We are looking for software engineers who thrive on complex problems and relish the challenge of operating complex and mission critical systems under extreme loads. Our systems manage hundreds of millions of records, and responds to millions of service requests per minute.'}

In [24]:
type(json_res)

dict

In [25]:
import pandas as pd
import os

# Get the current working directory
current_dir = os.getcwd()

# Join the directory path with the file name
file_path = os.path.join(current_dir, 'app', 'resource', 'my_projects.csv')

# Read the CSV file
df = pd.read_csv(file_path)

print(df)

                               Techstack  \
0                React, Node.js, MongoDB   
1               Angular,.NET, SQL Server   
2      Vue.js, Ruby on Rails, PostgreSQL   
3                  Python, Django, MySQL   
4              Java, Spring Boot, Oracle   
5             Flutter, Firebase, GraphQL   
6                  WordPress, PHP, MySQL   
7                    Magento, PHP, MySQL   
8         React Native, Node.js, MongoDB   
9                  iOS, Swift, Core Data   
10       Android, Java, Room Persistence   
11             Kotlin, Android, Firebase   
12       Android TV, Kotlin, Android NDK   
13                     iOS, Swift, ARKit   
14        Cross-platform, Xamarin, Azure   
15          Backend, Kotlin, Spring Boot   
16         Frontend, TypeScript, Angular   
17    Full-stack, JavaScript, Express.js   
18  Machine Learning, Python, TensorFlow   
19               DevOps, Jenkins, Docker   

                                        Project_Links  
0                 h

In [26]:
import uuid  # used to generate unique identifiers (UUIDs)
import chromadb

In [27]:
client = chromadb.PersistentClient('vectorstore')  # PersistentClient: store the database on disk.
collection = client.get_or_create_collection(name="my_projects.csv")

if not collection.count():
  # populates ChromaDB collectiont with data from a DataFrame
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                      metadatas={"links": row["Project_Links"]},
                       ids=[str(uuid.uuid4())])

In [28]:
links = collection.query(query_texts=["Experience in Python", "Expertise in React"], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/react-portfolio'},
  {'links': 'https://example.com/react-native-portfolio'}]]

In [29]:
job = json_res
job

{'role': 'SDE II',
 'experience': '3+ years of non-internship professional software development experience, 2+ years of non-internship design or architecture experience',
 'skills': ['Java',
  'Linux',
  'software programming language',
  'design patterns',
  'reliability and scaling',
  'coding standards',
  'code reviews',
  'source control management',
  'build processes',
  'testing',
  'operations'],
 'description': 'We are looking for software engineers who thrive on complex problems and relish the challenge of operating complex and mission critical systems under extreme loads. Our systems manage hundreds of millions of records, and responds to millions of service requests per minute.'}

In [30]:
job['skills']

['Java',
 'Linux',
 'software programming language',
 'design patterns',
 'reliability and scaling',
 'coding standards',
 'code reviews',
 'source control management',
 'build processes',
 'testing',
 'operations']

now pass these skills required in the job in collection.query

In [31]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/java-portfolio'},
  {'links': 'https://example.com/android-portfolio'}],
 [{'links': 'https://example.com/java-portfolio'},
  {'links': 'https://example.com/magento-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/magento-portfolio'}],
 [{'links': 'https://example.com/android-portfolio'},
  {'links': 'https://example.com/flutter-portfolio'}],
 [{'links': 'https://example.com/android-portfolio'},
  {'links': 'https://example.com/ml-python-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/full-stack-js-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/devops-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/devops-portfolio'},
  {'links': 'https://example.com/ml-python-portfolio'}],
 [{'links

cold email prompt

In [32]:
# define the structure of the prompt to be sent to the LLM
prompt_email = PromptTemplate.from_template(
    """
    ### JOB DESCRIPTION:
    {job_description}
    
    ### INSTRUCTION:
    You are Sudhir Sude, a pass out B.Tech CSE student from the Indian Institute of Information Technology, Pune (IIIT Pune). 
    You are looking for job/internship opportunities. Your job is to write a cold email to the client regarding the job mentioned above, 
    highlighting your capabilities and demonstrating how you can fulfill their needs. 
    Additionally, include the most relevant project links from the following list to showcase your portfolio and relevant skills: {link_list}. 
    Maintain a professional and enthusiastic tone throughout. 
    Remember, you are Sudhir Sude.
    Do not provide a preamble.
    ### EMAIL (NO PREAMBLE):
    
    """
)

# Create the processing chain
chain_email = prompt_email | llm

response = chain_email.invoke({"job_description": str(job), "link_list": links})
print(response.content)

Subject: Application for SDE II Position - Bringing Passion and Expertise to Complex Problem-Solving

Dear Hiring Manager,

I am thrilled to apply for the SDE II position at your esteemed organization, where I can leverage my skills and experience to tackle complex problems and contribute to the development of mission-critical systems. With a strong foundation in computer science and a passion for software development, I am confident that I can make a significant impact at your company.

As a recent B.Tech CSE graduate from the Indian Institute of Information Technology, Pune (IIIT Pune), I have developed a solid understanding of software programming languages, design patterns, and software development principles. My experience with Java, in particular, has equipped me with the skills to design and develop efficient, scalable, and reliable software systems. I am excited about the opportunity to work with a talented team to build and operate complex systems that manage hundreds of milli