In [8]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    groq_api_key="GROQ_API_KEY",
    model="llama3-8b-8192",
    temperature=0.2,
)

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("JOB_POSTING_LINK")
page_data = loader.load().pop().page_content
print(page_data)

In [4]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
        """
        ### SCRAPED TEXT FROM WEBSITE:
        {page_data}
        ### INSTRUCTION:
        The scraped text is from the career's page of a website.
        Your job is to extract the job postings and return them in JSON format containing the 
        following keys: `role`, `experience`, `skills` and `description`.
        Only return the valid JSON.
        ### VALID JSON (NO PREAMBLE):    
        """
)

chain_extract = prompt_extract | llm 
res = chain_extract.invoke(input={'page_data':page_data})
job_description = res.content
type(res.content)
print(res.content)

Here is the extracted job posting in JSON format:

```
[
  {
    "role": "Javascript Developer",
    "experience": "Fresher",
    "skills": ["React/Angular", "JavaScript, CSS, HTML, and front-end languages", "RESTful services", "Git"],
    "description": "Experience on React/Angular, In-depth knowledge of JavaScript, CSS, HTML, and front-end languages, Familiarity with JavaScript frameworks such as Angular or React, Experience with browser-based debugging and performance testing software, Excellent troubleshooting skills, Excellent verbal communication skills, Experience with RESTful services, Git knowledge is a plus"
  }
]
```

Note that there is only one job posting in the provided text, so the output is a single JSON object. If there were multiple job postings, the output would be an array of JSON objects.


In [24]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)

if isinstance(json_res, list) and len(json_res) == 1:
    json_res = json_res[0]
    
print(type(json_res))
print(json_res)

<class 'dict'>
{'role': 'PHP Developer', 'experience': 'Fresher', 'skills': ['PHP', 'Laravel', 'CodeIgniter', 'MVC design patterns', 'AJAX/JavaScript/Json/Jquery', 'Creating API and CSS', 'relational databases', 'Apache', 'MySql', 'Git'], 'description': 'Good Experience of PHP web frameworks like Laravel, Symphony, CodeIgniter is must. Understanding of MVC design patterns. Knowledge for AJAX/JavaScript/Json/Jquery/Creating API and CSS is preferable. Good knowledge of relational databases of Apache, MySql and their declarative query languages. Proficient understanding of code versioning tools, such as Git. Experience in common third party APIs (Google, Facebook, eBay) and developing web services. Should be good at PHP language and basic understanding of OOPS. Should have strong fundamentals, good coding standards and desire to develop new bold ideas. Strong Technical and Debugging Skills. Strong Communicator, in verbal and written forms for international projects. Ability to create an e

In [25]:
type(json_res)

dict

In [26]:
import pandas as pd

df = pd.read_csv("my_portfolio.csv")
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [27]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio1")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [28]:
job=json_res
print(job['skills'])
links = collection.query(query_texts=job['skills'], n_results=2).get('metadatas', [])
links

['PHP', 'Laravel', 'CodeIgniter', 'MVC design patterns', 'AJAX/JavaScript/Json/Jquery', 'Creating API and CSS', 'relational databases', 'Apache', 'MySql', 'Git']


[[{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/kotlin-backend-portfolio'},
  {'links': 'https://example.com/android-portfolio'}],
 [{'links': 'https://example.com/full-stack-js-portfolio'},
  {'links': 'https://example.com/magento-portfolio'}],
 [{'links': 'https://example.com/typescript-frontend-portfolio'},
  {'links': 'https://example.com/full-stack-js-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/wordpress-portfolio'}],
 [{'links': 'https://example.com/magento-portfolio'},
  {'links': 'https://example.com/

In [91]:
job=json_res
job['skills']

'Hands-on expertise with security platforms such as CrowdStrike, Palo Alto Networks, or similar systems, with the capability to translate complex security requirements into clear action plans for technical teams.'

In [None]:
prompt_email = PromptTemplate.from_template(
        """
        ### JOB DESCRIPTION:
        {job_description}
        
        ### INSTRUCTION:
        You are Dhirav, a business development executive at XYZ company. XYZ company is an AI & Software Consulting company dedicated to facilitating
        the seamless integration of business processes through automated tools. 
        Over our experience, we have empowered numerous enterprises with tailored solutions, fostering scalability, 
        process optimization, cost reduction, and heightened overall efficiency. 
        Your job is to write a cold email to the client regarding the job mentioned above describing the capability of XYZ company 
        in fulfilling their needs.
        Also add the most relevant ones from the following links to showcase XYZ company's portfolio: {link_list}
        Remember you are Dhirav, BDE at XYZ company. 
        Do not provide a preamble.
        ### EMAIL (NO PREAMBLE):
        
        """
        )

chain_email = prompt_email | llm
res = chain_email.invoke({"job_description": str(job), "link_list": links})
print(res.content)