In [None]:
!pip install langchain
!pip install langchain-groq
!pip install langchain-community
!pip install chromadb

In [2]:
from langchain_groq import ChatGroq

In [3]:
llm = ChatGroq(
    temperature=0,
    groq_api_key="#abcd#",
    model="llama-3.3-70b-versatile"
)

## Web-Scrapping

In [4]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://jobs.nike.com/job/R-47136?from=job%20search%20funnel")
page_data = loader.load().pop().page_content
print(page_data)



Apply for Software Engineer

Search JobsSkip navigationSearch JobsNIKE, INC. JOBSContract JobsJoin The Talent CommunityLife @ NikeOverviewBenefitsBrandsOverviewJordanConverseTeamsOverviewAdministrative SupportAdvanced InnovationAir Manufacturing InnovationAviationCommunicationsCustomer ServiceDesignDigitalFacilitiesFinance & AccountingGovernment & Public AffairsHuman ResourcesInsights & AnalyticsLegalManufacturing & EngineeringMarketingMerchandisingPlanningPrivacyProcurementProduct Creation, Development & ManagementRetail CorporateRetail StoresSalesSocial & Community ImpactSports MarketingStrategic PlanningSupply Chain, Distribution & LogisticsSustainabilityTechnologyLocationsOverviewNike WHQNike New York HQEHQ: Hilversum, The NetherlandsELC: Laakdal, BelgiumGreater China HQDiversity, Equity & InclusionOverviewMilitary InclusionDisability InclusionIndigenous InclusionInternshipsTechnologySoftware EngineerBeaverton, OregonBecome a Part of the NIKE, Inc. TeamNIKE, Inc. does more than out

## Converting the scrapped text into JSON format

In [5]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
    """

    ### SCRAPED TEXT FROM WEBSITE:
    {page_data}
    ### INSTRUCTION:
    The scrapped text is the careers's page of a website.
    Your job is to extract the job postings and return them in JSON format containing the
    following keys: 'role', 'experience', 'skills', and 'description'.
    Only return the valid JSON.
    ### VALID JSON (NO PREAMBLE):

    """
)

chain_extract = prompt_extract | llm
res = chain_extract.invoke(input={'page_data': page_data})
print(res.content) #This returned a well formatted string

```json
{
  "role": "Software Engineer",
  "experience": "5 years of progressive post-baccalaureate experience in the job offered or in a Engineering-related occupation",
  "skills": [
    "Java",
    "Javascript",
    "NodeJS",
    "Groovy",
    "AWS Cloud platform",
    "REST API",
    "Automation scripting frameworks and technologies such as Testcafe, Selenium or Postman/Rest-assured",
    "Performance tools such as Jmeter or Gatling",
    "Build Management tool such as Gradle",
    "Version Control tools such as Github",
    "SAP modules order management",
    "Dev-ops tools such as Splunk, signalFX, AWS Cloudwatch, and Jenkins",
    "Database tools such as Mysql, PostgreSQL, and Teradata",
    "Cloud testing platform such as browserstack"
  ],
  "description": "Develop, code, configure, and test programs and systems and solutions problem in order to meet designed digital product specification and direction. Manage the team on gathering detail design and solving complex problems. E

In [6]:
type(res.content)

str

In [7]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

{'role': 'Software Engineer',
 'experience': '5 years of progressive post-baccalaureate experience in the job offered or in a Engineering-related occupation',
 'skills': ['Java',
  'Javascript',
  'NodeJS',
  'Groovy',
  'AWS Cloud platform',
  'REST API',
  'Automation scripting frameworks and technologies such as Testcafe, Selenium or Postman/Rest-assured',
  'Performance tools such as Jmeter or Gatling',
  'Build Management tool such as Gradle',
  'Version Control tools such as Github',
  'SAP modules order management',
  'Dev-ops tools such as Splunk, signalFX, AWS Cloudwatch, and Jenkins',
  'Database tools such as Mysql, PostgreSQL, and Teradata',
  'Cloud testing platform such as browserstack'],
 'description': 'Develop, code, configure, and test programs and systems and solutions problem in order to meet designed digital product specification and direction. Manage the team on gathering detail design and solving complex problems. Ensure tech design driven by the Engineering Mana

In [8]:
type(json_res)

dict

In [9]:
import pandas as pd

df = pd.read_csv("/content/my_portfolio.csv")
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [10]:
import chromadb
import uuid

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
  for _, row in df.iterrows():
    collection.add(documents=row["Techstack"],
                   metadatas={"links": row["Links"]},
                   ids=[str(uuid.uuid4())])

/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 46.5MiB/s]


In [11]:
links = collection.query(query_texts=["Experience in Python", "Expertise in React Native"],
                         n_results=2).get('metadatas')
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/react-native-portfolio'},
  {'links': 'https://example.com/react-portfolio'}]]

In [12]:
job = json_res
job['skills']

['Java',
 'Javascript',
 'NodeJS',
 'Groovy',
 'AWS Cloud platform',
 'REST API',
 'Automation scripting frameworks and technologies such as Testcafe, Selenium or Postman/Rest-assured',
 'Performance tools such as Jmeter or Gatling',
 'Build Management tool such as Gradle',
 'Version Control tools such as Github',
 'SAP modules order management',
 'Dev-ops tools such as Splunk, signalFX, AWS Cloudwatch, and Jenkins',
 'Database tools such as Mysql, PostgreSQL, and Teradata',
 'Cloud testing platform such as browserstack']

In [13]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}

        ### INSTRUCTION:
        You are Sarthak, a business development executive at Fealty. Fealty is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools.
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability,
        process optimization, cost reduction, and heightened overall efficiency.
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of Fealty
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase Fealty's portfolio: {link_list}
        Remember you are Sarthak, BDE at Fealty.
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):

        """
        )
chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)

Subject: Expert Software Engineering Solutions for Nike's Digital Transformation

Dear Hiring Manager,

I came across the job description for a Software Engineer at Nike, and I'm excited to introduce Fealty, an AI & Software Consulting company that can help fulfill your requirements. With our expertise in developing, coding, configuring, and testing programs and systems, we're confident in our ability to support Nike's digital product specification and direction.

Our team of skilled engineers has experience in working with a range of technologies, including Java, Javascript, NodeJS, Groovy, AWS Cloud platform, REST API, and more. We've also worked with automation scripting frameworks and technologies such as Testcafe, Selenium, and Postman/Rest-assured, as well as performance tools like Jmeter and Gatling. Our proficiency in build management tools like Gradle, version control tools like Github, and database tools like Mysql, PostgreSQL, and Teradata can help streamline your developmen