In [1]:
import chromadb
import os

In [5]:
client = chromadb.Client()
collection = client.create_collection(name="my_collection")

In [3]:
collection.add(
    documents=[
        "This document is about New York",
        "This document is about Delhi"
    ],
    ids = ['id1','id2']
)

C:\Users\lenovo\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:38<00:00, 2.15MiB/s]


In [4]:
all_docs = collection.get()
all_docs

{'ids': ['id1', 'id2'],
 'embeddings': None,
 'metadatas': [None, None],
 'documents': ['This document is about New York',
  'This document is about Delhi'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [5]:
documents = collection.get(ids=['id1'])
documents

{'ids': ['id1'],
 'embeddings': None,
 'metadatas': [None],
 'documents': ['This document is about New York'],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [7]:
results = collection.query(
    query_texts=['Query is about chole Bhature'],
    n_results=2
)
results

{'ids': [['id2', 'id1']],
 'distances': [[1.443028450012207, 1.8624567985534668]],
 'metadatas': [[None, None]],
 'embeddings': None,
 'documents': [['This document is about Delhi',
   'This document is about New York']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [8]:
results = collection.query(  # semantic search
    query_texts=['Query is about Brooklyn bridge'],
    n_results=2
)
results

{'ids': [['id1', 'id2']],
 'distances': [[1.095342755317688, 1.571172833442688]],
 'metadatas': [[None, None]],
 'embeddings': None,
 'documents': [['This document is about New York',
   'This document is about Delhi']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [11]:
collection.delete(ids=all_docs['ids'])
collection.get()

{'ids': [],
 'embeddings': None,
 'metadatas': [],
 'documents': [],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents']}

In [12]:
collection.add(
    documents=[
        "This document is about New York",
        "This document is about Delhi"
    ],
    ids = ['id1','id2'],
    metadatas=[
        {"url":"https://en.wikipedia.org/wiki/New_York_City"},
        {"url":"https://en.wikipedia.org/wiki/Delhi"}
    ]
)

In [13]:
results = collection.query(
    query_texts=['Query is about chole Bhature'],
    n_results=2
)
results

{'ids': [['id2', 'id1']],
 'distances': [[1.443028450012207, 1.8624567985534668]],
 'metadatas': [[{'url': 'https://en.wikipedia.org/wiki/Delhi'},
   {'url': 'https://en.wikipedia.org/wiki/New_York_City'}]],
 'embeddings': None,
 'documents': [['This document is about Delhi',
   'This document is about New York']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [6]:
from langchain_groq import ChatGroq

In [7]:
llm = ChatGroq(
    temperature=0,
    groq_api_key = os.environ['GROQ_API_KEY'],
    model_name = "llama-3.1-70b-versatile"
)
response = llm.invoke("The first person to land on moon was...")
print(response.content)

The first person to land on the moon was Neil Armstrong. He stepped onto the lunar surface on July 20, 1969, as part of the Apollo 11 mission.


In [9]:
from langchain_community.document_loaders import WebBaseLoader

In [10]:
loader = WebBaseLoader("https://jobs.nike.com/job/R-32222")
page_data = loader.load().pop().page_content
print(page_data)

Apply for Senior Machine Learning Engineer

Search JobsSkip navigationSearch JobsNIKE, INC. JOBSContract JobsJoin The Talent CommunityLife @ NikeOverviewBenefitsBrandsOverviewJordanConverseTeamsOverviewAdministrative SupportAdvanced InnovationAir Manufacturing InnovationAviationCommunicationsCustomer ServiceDesignDigitalFacilitiesFinance & AccountingGovernment & Public AffairsHuman ResourcesInsights & AnalyticsLegalManufacturing & EngineeringMarketingMerchandisingPlanningPrivacyProcurementProduct Creation, Development & ManagementRetail CorporateRetail StoresSalesSocial & Community ImpactSports MarketingStrategic PlanningSupply Chain, Distribution & LogisticsSustainabilityTechnologyLocationsOverviewNike WHQNike New York HQEHQ: Hilversum, The NetherlandsELC: Laakdal, BelgiumGreater China HQDiversity, Equity & InclusionOverviewMilitary InclusionDisability InclusionIndigenous InclusionInternshipsData & AnalyticsSenior Machine Learning EngineerGdansk, PomeranianBecome a Part of the NIKE, I

In [11]:
from langchain_core.prompts import PromptTemplate

In [12]:
prompt_extract = PromptTemplate.from_template(
    """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE): 
    """
)

chain_extract = prompt_extract | llm # create pipline first extract then passing it to llm
res = chain_extract.invoke(input={'page_data':page_data})
print(res.content)

```json
{
  "role": "Senior Machine Learning Engineer",
  "experience": "3+ years of experience in the field of ML Engineering or Software Engineering",
  "skills": [
    "Understanding of Machine Learning, its applications, and the lifecycle of an ML application in production",
    "Ability to write robust, maintainable, and extendable code in Python",
    "Experience working in and/or collaborating with a partial or fully distributed team",
    "Strong experiential understanding of data structures, algorithms, and data solutions",
    "Familiarity with frameworks such as Scikit-learn, PyTorch, Tensorflow, Spark, FastAPI or similar platforms and frameworks",
    "Experience with cloud architecture and technologies (preferably Amazon Web Services: ECR, SageMaker, Lambda, API Gateway)",
    "Familiarity with pipeline orchestration tools such as AirFlow",
    "Awareness of CI/CD pipelines and containerization"
  ],
  "description": "As a Sr. Machine Learning Engineer within the AIML team

In [13]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)

print(json_res)



{'role': 'Senior Machine Learning Engineer', 'experience': '3+ years of experience in the field of ML Engineering or Software Engineering', 'skills': ['Understanding of Machine Learning, its applications, and the lifecycle of an ML application in production', 'Ability to write robust, maintainable, and extendable code in Python', 'Experience working in and/or collaborating with a partial or fully distributed team', 'Strong experiential understanding of data structures, algorithms, and data solutions', 'Familiarity with frameworks such as Scikit-learn, PyTorch, Tensorflow, Spark, FastAPI or similar platforms and frameworks', 'Experience with cloud architecture and technologies (preferably Amazon Web Services: ECR, SageMaker, Lambda, API Gateway)', 'Familiarity with pipeline orchestration tools such as AirFlow', 'Awareness of CI/CD pipelines and containerization'], 'description': 'As a Sr. Machine Learning Engineer within the AIML team, you will be developing sophisticated analytics syst

In [14]:
import pandas as pd

In [15]:
df = pd.read_csv("portfolio.csv")
df

Unnamed: 0,Techstack,Links
0,React Node.js MongoDB,https://example.com/react-portfolio
1,Angular.NET SQL Server,https://example.com/angular-portfolio
2,Vue.js Ruby on Rails PostgreSQL,https://example.com/vue-portfolio
3,Python Django MySQL,https://example.com/python-portfolio
4,Java Spring Boot Oracle,https://example.com/java-portfolio
5,Flutter Firebase GraphQL,https://example.com/flutter-portfolio
6,WordPress PHP MySQL,https://example.com/wordpress-portfolio
7,Magento PHP MySQL,https://example.com/magento-portfolio
8,React Native Node.js MongoDB,https://example.com/react-native-portfolio
9,iOS Swift Core Data,https://example.com/ios-portfolio


In [16]:
import chromadb
import uuid

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")
#https://www.youtube.com/results?search_query=end+to+end+ai+system+design++production+grade+project+
#https://www.youtube.com/results?search_query=ai+software+production+grade+project%27%5C

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row['Links']},
                       ids=[str(uuid.uuid4())])


In [17]:
links = collection.query(query_texts=['Experience in Python', 'Experience in React'],n_results=2).get('metadatas',[])
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/react-portfolio'},
  {'links': 'https://example.com/react-native-portfolio'}]]

In [18]:
job = json_res
job['skills']

['Understanding of Machine Learning, its applications, and the lifecycle of an ML application in production',
 'Ability to write robust, maintainable, and extendable code in Python',
 'Experience working in and/or collaborating with a partial or fully distributed team',
 'Strong experiential understanding of data structures, algorithms, and data solutions',
 'Familiarity with frameworks such as Scikit-learn, PyTorch, Tensorflow, Spark, FastAPI or similar platforms and frameworks',
 'Experience with cloud architecture and technologies (preferably Amazon Web Services: ECR, SageMaker, Lambda, API Gateway)',
 'Familiarity with pipeline orchestration tools such as AirFlow',
 'Awareness of CI/CD pipelines and containerization']

In [19]:
job

{'role': 'Senior Machine Learning Engineer',
 'experience': '3+ years of experience in the field of ML Engineering or Software Engineering',
 'skills': ['Understanding of Machine Learning, its applications, and the lifecycle of an ML application in production',
  'Ability to write robust, maintainable, and extendable code in Python',
  'Experience working in and/or collaborating with a partial or fully distributed team',
  'Strong experiential understanding of data structures, algorithms, and data solutions',
  'Familiarity with frameworks such as Scikit-learn, PyTorch, Tensorflow, Spark, FastAPI or similar platforms and frameworks',
  'Experience with cloud architecture and technologies (preferably Amazon Web Services: ECR, SageMaker, Lambda, API Gateway)',
  'Familiarity with pipeline orchestration tools such as AirFlow',
  'Awareness of CI/CD pipelines and containerization'],
 'description': 'As a Sr. Machine Learning Engineer within the AIML team, you will be developing sophisticat

In [22]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are xG, a business development executive at xyz. xyz is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of AtliQ 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Atliq's portfolio: {link_list}
        Remember you are xG, BDE at xyz. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Expert Machine Learning Solutions for Nike's AIML Team

Dear Hiring Manager,

I came across the Senior Machine Learning Engineer role at Nike and was impressed by the company's commitment to leveraging AI and machine learning to drive business growth. As a Business Development Executive at xyz, I'd like to introduce you to our team of experts who can help fulfill your requirements.

At xyz, we specialize in providing AI and software consulting services that empower businesses to streamline their processes, reduce costs, and enhance overall efficiency. Our team of seasoned machine learning engineers has a proven track record of developing sophisticated analytics systems that drive data-driven decision-making.

Our expertise aligns perfectly with the requirements outlined in the job description. We have extensive experience in:

* Developing robust, maintainable, and extendable code in Python
* Working with frameworks such as Scikit-learn, PyTorch, and TensorFlow
* Designing and