In [4]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv


load_dotenv()

api_key = os.getenv("API_KEY")

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    groq_api_key = api_key
)


In [5]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)

In [8]:
ai_msg.content

'Je adore programmer.'

In [9]:
llm.invoke("The first person to land on the moon was...").content

'The first person to land on the moon was Neil Armstrong. He stepped out of the lunar module Eagle and onto the moon\'s surface on July 20, 1969, during the Apollo 11 mission. Armstrong famously declared, "That\'s one small step for man, one giant leap for mankind," as he became the first human to set foot on the moon.'

In [12]:
import chromadb 
chroma_client = chromadb.Client()

In [13]:
collection = chroma_client.create_collection(name="my_collection")


In [14]:
collection.add(
    documents=[
        "This is a document about pineapple",
        "This is a document about oranges"
    ],
    ids=["id1", "id2"]
)


C:\Users\biswa\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [03:05<00:00, 448kiB/s]   


In [18]:
results = collection.query(
    query_texts=["This is a query document about hawaii"], # Chroma will embed this for you
    n_results=2 # how many results to return
)
print(results)


{'ids': [['id1', 'id2']], 'embeddings': None, 'documents': [['This is a document about pineapple', 'This is a document about oranges']], 'uris': None, 'data': None, 'metadatas': [[None, None]], 'distances': [[1.0404009819030762, 1.2430799007415771]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}


In [20]:
collection.delete(ids = ['id1','id2'])
collection.get()


{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'data': None,
 'metadatas': [],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.google.com/about/careers/applications/jobs/results/137734004324344518-staff-software-engineer-core-machine-learning-google-cloud?tag=ai-spotlight")
page_data = loader.load().pop().page_content
print(page_data)













Python Developer(AIML/RAG) Job Details | Wipro Limited



























Press Tab to Move to Skip to Content Link
Skip to main content























Login

































Home
Life at Wipro


                Careers 


Early Careers
Experienced Professionals


Join Our Talent Network




























Login



































Home
Life at Wipro


                Careers 


Early Careers
Experienced Professionals


Join Our Talent Network









































Home
Life at Wipro


                Careers 


Early Careers
Experienced Professionals


Join Our Talent Network














Login










































Search by ‘Skills’ or ‘Keywords’ or 'Requisition ID’




Search by Country







                                 
                            



















Select how often (in days) to receive an alert:



 Create Alert







×



Select how often (in days)

In [79]:
from langchain_core.prompts import PromptTemplate

extraction_prompt = PromptTemplate.from_template(
    """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract a single job posting and return it in JSON format containing the 
        following keys: `role`, `experience`, `skills`, and `description`.
        skills value is not a dictionary, rather just contains the skills. 
        Only return a valid JSON object without wrapping it in a list and no examples required.
        ### VALID JSON (NO PREAMBLE):
    """
)


extraction_chain = extraction_prompt | llm
res = extraction_chain.invoke(input={'page_data':page_data})
print(res.content)


{
  "role": "Senior Machine Learning Applied Researcher",
  "experience": "Ph.D. in Machine Learning, Computer Science, Mathematics, Statistics, or a related field, or equivalent work experience",
  "skills": "Python, Scala, Spark, Hadoop, Machine Learning, Deep Learning, Data Mining, Reinforcement Learning, Optimization Techniques, Personalization Systems, Recommendation Engines, Ranking Algorithms, Large Language Models, Marketing Analytics, User Segmentation, Customer Lifetime Value Modeling",
  "description": "We are seeking a talented and experienced Senior Applied Researcher to lead and contribute to high-visibility projects. This role requires deep expertise in machine learning, a passion for research and development, and the ability to effectively communicate complex ideas to cross-functional teams and partners. If you are a self-motivated, energetic individual with a track record of delivering impactful results, we want to hear from you!"
}


In [80]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

{'role': 'Senior Machine Learning Applied Researcher',
 'experience': 'Ph.D. in Machine Learning, Computer Science, Mathematics, Statistics, or a related field, or equivalent work experience',
 'skills': 'Python, Scala, Spark, Hadoop, Machine Learning, Deep Learning, Data Mining, Reinforcement Learning, Optimization Techniques, Personalization Systems, Recommendation Engines, Ranking Algorithms, Large Language Models, Marketing Analytics, User Segmentation, Customer Lifetime Value Modeling',
 'description': 'We are seeking a talented and experienced Senior Applied Researcher to lead and contribute to high-visibility projects. This role requires deep expertise in machine learning, a passion for research and development, and the ability to effectively communicate complex ideas to cross-functional teams and partners. If you are a self-motivated, energetic individual with a track record of delivering impactful results, we want to hear from you!'}

In [45]:
type(json_res)

dict

In [75]:
import pandas as pd

df = pd.read_csv("./app/resource/my_portfolio.csv")
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [81]:
job = json_res
job['skills']

'Python, Scala, Spark, Hadoop, Machine Learning, Deep Learning, Data Mining, Reinforcement Learning, Optimization Techniques, Personalization Systems, Recommendation Engines, Ranking Algorithms, Large Language Models, Marketing Analytics, User Segmentation, Customer Lifetime Value Modeling'

In [82]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [83]:
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}]]

In [68]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Mohan, a business development executive at AtliQ. AtliQ is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
        Remember you are Mohan, BDE at AtliQ. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Unlock Your AI Potential with AtliQ

Dear Hiring Manager,

I came across the job description for a Staff Software Engineer, Core Machine Learning, Google Cloud, and I was impressed by the requirements and responsibilities outlined. As a business development executive at AtliQ, an AI & Software Consulting company, I believe our expertise and capabilities align perfectly with your needs.

AtliQ has a proven track record of empowering enterprises with tailored solutions that foster scalability, process optimization, cost reduction, and heightened overall efficiency. Our team of experts has a deep understanding of machine learning algorithms and tools, artificial intelligence, deep learning, and natural language processing, which are essential skills for this role.

I'd like to highlight a few examples of our work that demonstrate our capabilities:

* Our team has developed a robust machine learning pipeline using Python and TensorFlow, which can be viewed on our portfolio page: h