<a href="https://colab.research.google.com/github/4NI5H/ai-outreach-email-geneator/blob/main/cold_email_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain  langchain-community langchain-groq

In [None]:
!pip install chromadb streamlit

In [None]:
from google.colab import userdata
import os

In [None]:
os.environ['GROQ_API_KEY']=userdata.get('GROQ_API_KEY')
os.environ['USER_AGENT'] = 'email-generator'

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq

In [None]:
from langchain_community.document_loaders import WebBaseLoader

In [None]:
llm = ChatGroq(temperature=0.5, model_name="llama-3.3-70b-versatile")

In [None]:
def extract_job_info(job_urls):

  loader = WebBaseLoader(job_urls)
  content = loader.load().pop().page_content

  prompt_template = PromptTemplate.from_template(
      """
              ### SCRAPED TEXT FROM WEBSITE:
              {page_data}
              ### INSTRUCTION:
              The scraped text is from the career's page of a website.
              Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
              Only return the valid JSON.
              ### VALID JSON (NO PREAMBLE):
              """
  )
  chain = prompt_template | llm
  res = chain.invoke(input={"page_data": content})
  return res

In [None]:
from langchain_core.output_parsers import JsonOutputParser

def parse_output_to_json(response):
  output_parser = JsonOutputParser()
  return output_parser.parse(response.content)

In [None]:
job_urls = ['https://www.talentica.com/jobdescription/senior-software-developer-python/']
job_info = extract_job_info(job_urls)

job =parse_output_to_json(job_info)



In [None]:
job

# type(job)

[{'role': 'Senior Software Developer- Python',
  'experience': '3.6 to 5.5 years',
  'skills': ['Python',
   'Django',
   'web development',
   'data structures',
   'algorithms',
   'problem-solving'],
  'description': 'Develop web applications using Python and Django. Integrate back-end systems for seamless functionality. Deploy scalable infrastructure solutions.'}]

In [None]:
import pandas as pd
df = pd.read_csv('my_portfolio.csv')

df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [None]:
import chromadb
import uuid
client = chromadb.PersistentClient(path="db")
collection = client.get_or_create_collection('portfolio')

if not collection.count():
  for _, row in df.iterrows():
    collection.add(documents= row['Techstack'],
        metadatas = {"links": row['Links']},
        ids = str(uuid.uuid4())
    )

/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 81.0MiB/s]


In [None]:
def get_relevant_links(skills):
   return collection.query(query_texts=skills, n_results=5).get('metadatas')


In [None]:
relevant_links = get_relevant_links(job[0]['skills'])

In [None]:
def write_mail(llm, job, links):
        prompt_email = PromptTemplate.from_template(
            """
            ### JOB DESCRIPTION:
            {job_description}

            ### INSTRUCTION:
            You are Jason, a business development executive at TCS. TCS is an AI & Software Consulting company dedicated to facilitating
            the seamless integration of business processes through automated tools.
            Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability,
            process optimization, cost reduction, and heightened overall efficiency.
            Your job is to write a cold email to the client regarding the job mentioned above describing the capability of TCS
            in fulfilling their needs.
            Also add the most relevant ones from the following links to showcase TCS's portfolio: {link_list}
            Remember you are Mohan, BDE at TCS.
            Do not provide a preamble.
            ### EMAIL (NO PREAMBLE):

            """
        )
        chain_email = prompt_email | llm
        res = chain_email.invoke({"job_description": str(job), "link_list": links})
        return res.content
res = write_mail(llm=llm, job=job, links=relevant_links)

In [None]:
job_url_input = input("Enter the job URL: ")
job_info = extract_job_info([job_url_input])
parsed_jobs = parse_output_to_json(job_info)

jobs = []

if isinstance(parsed_jobs, dict):
    jobs.append(parsed_jobs)
elif isinstance(parsed_jobs, list):
    jobs = parsed_jobs
else:
    raise ValueError("Unexpected job_info format. Expected a dict or list.")


for job in jobs:
  relevant_links = get_relevant_links(job['skills'])

  email = write_mail(llm=llm, job=job, links=relevant_links)

  print(email)

Enter the job URL: https://www.talentica.com/jobdescription/senior-software-developer-java/
retrived job info {'role': 'Senior Software Developer Java', 'experience': '6 to 8 years of relevant experience', 'skills': ['Java 8 and above', 'RESTful API development', 'Spring Boot', 'Microservices architectures', 'Object-Relational Mappers (ORMs) like JPA and Hibernate', 'SQL and NoSQL databases', 'CI/CD practices', 'JavaScript or Python', 'Cloud infrastructure'], 'description': "We're seeking a talented Java Developer to join our dynamic team! The ideal candidate will strive to transform customer ideas into successful products, work on critical business problems, and be a star performer in a high-performance product-dev team."}
jobs after [{'role': 'Senior Software Developer Java', 'experience': '6 to 8 years of relevant experience', 'skills': ['Java 8 and above', 'RESTful API development', 'Spring Boot', 'Microservices architectures', 'Object-Relational Mappers (ORMs) like JPA and Hiberna

Use streamlit for UI.

In [None]:
%%writefile app.py
import streamlit as st
from helper import extract_job_info, parse_output_to_json, get_relevant_links, write_mail

# Streamlit app
st.title("Job Application Email Generator")

job_url = st.text_input("Enter the job URL:")

if st.button("Generate Email"):
    if job_url:
        try:
            job_urls = [job_url]  # Make job_urls a list
            job_info = extract_job_info(job_urls)

            job = parse_output_to_json(job_info)

            relevant_links = get_relevant_links(job[0]['skills'])
            res = write_mail(llm=llm, job=job, links=relevant_links)
            st.write(res)

        except Exception as e:
            st.error(f"An error occurred: {e}")
    else:
        st.warning("Please enter a job URL.")

Overwriting app.py


In [None]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
up to date, audited 23 packages in 976ms
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details
[1G[0K⠦[1G[0K
2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠦[1G[0K

In [None]:
!streamlit run app.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://slick-planets-listen.loca.lt
^C
