In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

In [None]:
from langchain_groq import ChatGroq


llm = ChatGroq(
    temperature=1, 
    groq_api_key=groq_api_key, 
    model_name="llama-3.3-70b-versatile",
   
    )

In [None]:
import chromadb
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="test_collection") 
# collection is like a table for inserting records

In [21]:
# adding records to the db
collection.add(
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges",
        "This is a document about Thailand",
        "This is a document about Bangalore",
        
    ],
    ids=["id1", "id2","id3","id4" ]
)

Add of existing embedding ID: id1
Add of existing embedding ID: id2
Add of existing embedding ID: id3
Add of existing embedding ID: id4
Insert of existing embedding ID: id1
Insert of existing embedding ID: id2
Insert of existing embedding ID: id3
Insert of existing embedding ID: id4


In [18]:
all_docs = collection.get()
all_docs

{'ids': ['id1', 'id2', 'id3', 'id4'],
 'embeddings': None,
 'documents': ['This is a document about pineapple',
  'This is a document about oranges',
  'This is a document about Thailand',
  'This is a document about Bangalore'],
 'uris': None,
 'data': None,
 'metadatas': [None, None, None, None],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [None]:
doc1 = collection.get(ids=["id1"])
doc1

In [None]:
# collection.query does a semantic search on the query text.

result = collection.query(
    query_texts=["This is a query document about Phi Phi"], # Chroma will embed this for you
    n_results=2 # how many results to return
)
result

In [None]:
collection.delete(ids=all_docs['ids'])
collection.get()

In [24]:
# adding records with metadata to the db
collection.add(
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges",
        "This is a document about Bangkok",
        "This is a document about Bangalore",
        
    ],
    ids=["id1", "id2","id3","id4" ],
    metadatas=[
        {"url":"https://en.wikipedia.org/wiki/Pineapple"},
        {"url":"https://en.wikipedia.org/wiki/Orange"},
        {"url":"https://en.wikipedia.org/wiki/Bangkok"},
        {"url":"https://en.wikipedia.org/wiki/Bengaluru"},

        ]
)

In [25]:
result = collection.query(
    query_texts=["This is a query document about Phi Phi"],
    n_results=2
)
result

{'ids': [['id3', 'id4']],
 'embeddings': None,
 'documents': [['This is a document about Bangkok',
   'This is a document about Bangalore']],
 'uris': None,
 'data': None,
 'metadatas': [[{'url': 'https://en.wikipedia.org/wiki/Bangkok'},
   {'url': 'https://en.wikipedia.org/wiki/Bengaluru'}]],
 'distances': [[1.217174768447876, 1.3561327457427979]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [36]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://jobs.lever.co/atlan/12b574ba-d65f-4cdc-8ac7-9aca0a7eecc9")

page_data = loader.load().pop().page_content
print(page_data)

Atlan - Partner Sales EngineerPartner Sales EngineerUnited StatesSales – Sales Engineering /Full Time /RemoteApply for this jobWhy is this role important for us? 🔗As our first Partner Sales Engineer at Atlan, you're at the heart of our mission to empower data teams worldwide through strategic partnerships with leading cloud/tech partners and System Integrators (SI's). Our sales and go-to-market team plays a pivotal role in helping customers solve problems and navigate their data journey. As our GTM team rapidly expands to meet growing demand, we're seeking a talented Partner Sales Engineer to help build and mature our Partner Sales Engineering function. This role is critical in fostering collaboration with our tech partners and SI's to accelerate growth and drive customer success.Your mission at Atlan ⭐️As a Partner Sales Engineer, your mission is to build strong relationships with key stakeholders within our partner ecosystem including partners such as Snowflake and Databricks.Your ro

In [38]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
    """
### SCRAPED TEXT FROM WEBSITE:
{page_data}
### INSTRUCTION
The scraped text is from the career's page of a website.
Your job is to extract the job posting and return them in JSON format containing the following keys: 'role', 'experience', 'skills', and 'description'

Only return the valid JSON.

### VALID JSON (NO PREAMBLE):
"""
)

chain_extract = prompt_extract | llm
response = chain_extract.invoke(input ={ "page_data" : page_data})

response.content

'```\n{\n  "role": "Partner Sales Engineer",\n  "experience": "A minimum of 7 years working experience in Sales Engineering + Partnerships",\n  "skills": [\n    "Sales Engineering Experience",\n    "Data Background",\n    "Technical Proficiency",\n    "Strategic Accounts Experience",\n    "Problem-Solving Skills",\n    "Strong Communication and Stakeholder Management",\n    "Entrepreneurial Mindset",\n    "Python",\n    "SQL",\n    "Rest APIs"\n  ],\n  "description": "As our first Partner Sales Engineer at Atlan, you\'re at the heart of our mission to empower data teams worldwide through strategic partnerships with leading cloud/tech partners and System Integrators (SI\'s). Your mission is to build strong relationships with key stakeholders within our partner ecosystem, evangelise Atlan’s value and competitive differentiation to partner field teams, and influence and create interest in the early stages of the partnership."\n}\n```'

In [40]:
# the content is string, we need to parse it now
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_response = json_parser.parse(response.content)

json_response 


{'role': 'Partner Sales Engineer',
 'experience': 'A minimum of 7 years working experience in Sales Engineering + Partnerships',
 'skills': ['Sales Engineering Experience',
  'Data Background',
  'Technical Proficiency',
  'Strategic Accounts Experience',
  'Problem-Solving Skills',
  'Strong Communication and Stakeholder Management',
  'Entrepreneurial Mindset',
  'Python',
  'SQL',
  'Rest APIs'],
 'description': "As our first Partner Sales Engineer at Atlan, you're at the heart of our mission to empower data teams worldwide through strategic partnerships with leading cloud/tech partners and System Integrators (SI's). Your mission is to build strong relationships with key stakeholders within our partner ecosystem, evangelise Atlan’s value and competitive differentiation to partner field teams, and influence and create interest in the early stages of the partnership."}

In [41]:
# We are uploading the test data in chroma db. 
# So whenerver there's a job opening, our program will match the skill metioned and add the mentioned url in the email.

import pandas as pd

df = pd.read_csv("test_data.csv")
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [None]:
import uuid
import chromadb

client = chromadb.PersistentClient("vector_store")
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(
            documents=row["Techstack"], 
            metadatas={"links" : row["Links"]},
            ids=[str(uuid.uuid4())]
            )        

In [47]:
job = json_response #coming after parsing the webpage
job["skills"]

['Sales Engineering Experience',
 'Data Background',
 'Technical Proficiency',
 'Strategic Accounts Experience',
 'Problem-Solving Skills',
 'Strong Communication and Stakeholder Management',
 'Entrepreneurial Mindset',
 'Python',
 'SQL',
 'Rest APIs']

In [48]:
links = collection.query(query_texts=job["skills"], n_results=2).get('metadatas', [])

links

[[{'links': 'https://example.com/vue-portfolio'},
  {'links': 'https://example.com/ml-python-portfolio'}],
 [{'links': 'https://example.com/ios-portfolio'},
  {'links': 'https://example.com/android-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/typescript-frontend-portfolio'}],
 [{'links': 'https://example.com/java-portfolio'},
  {'links': 'https://example.com/vue-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/devops-portfolio'},
  {'links': 'https://example.com/react-portfolio'}],
 [{'links': 'https://example.com/devops-portfolio'},
  {'links': 'https://example.com/vue-portfolio'}],
 [{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://e

In [None]:
# prompt template for creating email
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Ashley, a business development executive at Random Consulting. Random Consulting is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, process optimization, cost reduction, and heightened overall efficiency.

        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of Random Consulting in fulfilling their needs.

        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {links_list}

        Remember you are Ashley, BDE at Random Consulting. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
response_email = chain_email.invoke({"job_description": str(job), "links_list" : links})

print(response_email.content)

Subject: Enhancing Partnership Ecosystem with United Random's Expertise

Dear Hiring Manager at Atlan,

I came across the Partner Sales Engineer role at Atlan and was impressed by the company's mission to empower data teams worldwide. As a Business Development Executive at United Random, an AI & Software Consulting company, I believe our expertise can complement your strategic partnerships and System Integrators (SI's) ecosystem.

With our experience in facilitating seamless integration of business processes through automated tools, we can help Atlan build strong relationships with key stakeholders within the partner ecosystem. Our team of experts possesses the necessary skills to evangelize Atlan's value and competitive differentiation to partner field teams, influencing and creating interest in the early stages of the partnership.

Our technical proficiency includes Python, SQL, and Rest APIs, which align with the required skills for the Partner Sales Engineer role. We have a proven 