In [None]:
print('welcome')

welcome


In [3]:
import http.client
import json
import random
import re
from langchain.schema import Document
from dotenv import load_dotenv
import os 


In [4]:
load_dotenv()

api_key=os.getenv('RAPIDAPI_KEY')

INDIAN_CITIES = [
    "Mumbai", "Delhi", "Bangalore", "Hyderabad", "Ahmedabad",
    "Chennai", "Kolkata", "Surat", "Pune", "Jaipur",
    "Lucknow", "Kanpur", "Nagpur", "Visakhapatnam", "Indore",
    "Thane", "Bhopal", "Patna", "Vadodara", "Ghaziabad"
]

def fetch_jobs(query, location="India", results_wanted=5,api_key=api_key):
    conn = http.client.HTTPSConnection("jobs-search-api.p.rapidapi.com")
    
    # If location is "India", use random cities
    if location.lower() == "india":
        # Calculate how many jobs per city (at least 1 city per job)
        jobs_per_city = max(1, results_wanted // len(INDIAN_CITIES))
        all_jobs = []
        
        for city in random.sample(INDIAN_CITIES, min(len(INDIAN_CITIES), results_wanted)):
            payload = json.dumps({
                "search_term": query,
                "location": f"{city}, India",
                "results_wanted": jobs_per_city,
                "site_name": ["indeed", "linkedin", "zip_recruiter", "glassdoor"],
                "distance": 50,
                "job_type": "fulltime",
                "is_remote": False,
                "linkedin_fetch_description": True,
                "hours_old": 72
            })

            headers = {
	'x-rapidapi-key': api_key,
    'x-rapidapi-host': "jobs-search-api.p.rapidapi.com",
    'Content-Type': "application/json"
}

            try:
                conn.request("POST", "/getjobs", body=payload, headers=headers)
                res = conn.getresponse()
                data = res.read().decode("utf-8")
                city_jobs = json.loads(data).get("jobs", [])
                
                # Add city information to each job
                for job in city_jobs:
                    job["searched_location"] = city
                all_jobs.extend(city_jobs)
                
                # Stop if we've collected enough jobs
                if len(all_jobs) >= results_wanted:
                    break
                    
            except Exception as e:
                print(f"Error fetching jobs for {city}: {str(e)}")
                continue
                
        # Trim to exact result count and format
        return [
            {
                "job title": job["title"],
                "company": job["company"],
                "location": job.get("location", "N/A"),
                "searched_city": job.get("searched_location", "India"),
                "description": job["description"]
            }
            for job in all_jobs[:results_wanted]
            if all(key in job for key in ["title", "company", "description"])
        ]
    
    else:
        # Original single-location logic
        payload = json.dumps({
            "search_term": query,
            "location": location,
            "results_wanted": results_wanted,
            "site_name": ["indeed", "linkedin", "zip_recruiter", "glassdoor"],
            "distance": 50,
            "job_type": "fulltime",
            "is_remote": False,
            "linkedin_fetch_description": True,
            "hours_old": 72,
            "show_requirements": True, 
        })

        headers = {
	'x-rapidapi-key': api_key,
    'x-rapidapi-host': "jobs-search-api.p.rapidapi.com",
    'Content-Type': "application/json"
}

        conn.request("POST", "/getjobs", body=payload, headers=headers)
        res = conn.getresponse()
        data = res.read().decode("utf-8")
        job_data = json.loads(data)

        return [
            {
                "job title": job["title"],
                "company": job["company"],
                "location": job.get("location", "N/A"),
                "searched_city": location.split(",")[0].strip(),
                "description": job["description"]
            }
            for job in job_data.get("jobs", [])
            if all(key in job for key in ["title", "company", "description"])
        ]
        
def clean_text(text):
    """Remove excessive newlines and markdown bold syntax"""
    text = re.sub(r'\*\*', '', text)  # Remove **bold** markers
    text = re.sub(r'\n{3,}', '\n\n', text)  # Replace 3+ newlines with double newlines
    return text.strip()

def documentation(job_details):
    content=[] 
    for job in job_details: 
        doc = Document(
                page_content=clean_text(job["description"]),
                metadata={
                    "job_title": job["job title"],
                    "company": job["company"],
                    "location": job["location"],
                    "searched_city": job["searched_city"],
                    
                    "language": "en"
                    }
                )
        content.append(doc)
    return content

In [5]:
inputs='Data Science'



In [6]:
fetch_jobs=fetch_jobs(inputs)

In [7]:
fetch_jobs

[{'job title': 'Consultant (Credit Risk Modelling), Data Science & Analytics',
  'company': 'TransUnion',
  'location': 'Pune',
  'searched_city': 'Pune',
  'description': 'TransUnion\'s Job Applicant Privacy Notice\n\n\n**What We\'ll Bring:**\n\nThis position is responsible for supporting the development of credit risk management and business intelligence analytic solutions through consulting engagements and research serving TransUnion’s clients.\n**What You\'ll Bring:**\n\nWhat we’ll bring:\n  \n\n* A work environment that encourages collaboration and innovation. We consistently explore new technologies and tools to be agile.\n* Flexible time off, workplace flexibility, an environment that welcomes continued professional growth through support of tuition reimbursement, conferences and seminars.\n* Our culture encourages our people to hone current skills and build new capabilities while discovering their genius.\n* We provide a modern computing environment based on best\\-in\\-class "

In [8]:
docs=documentation(fetch_jobs)

In [9]:
docs[0]

Document(metadata={'job_title': 'Consultant (Credit Risk Modelling), Data Science & Analytics', 'company': 'TransUnion', 'location': 'Pune', 'searched_city': 'Pune', 'language': 'en'}, page_content='TransUnion\'s Job Applicant Privacy Notice\n\nWhat We\'ll Bring:\n\nThis position is responsible for supporting the development of credit risk management and business intelligence analytic solutions through consulting engagements and research serving TransUnion’s clients.\nWhat You\'ll Bring:\n\nWhat we’ll bring:\n  \n\n* A work environment that encourages collaboration and innovation. We consistently explore new technologies and tools to be agile.\n* Flexible time off, workplace flexibility, an environment that welcomes continued professional growth through support of tuition reimbursement, conferences and seminars.\n* Our culture encourages our people to hone current skills and build new capabilities while discovering their genius.\n* We provide a modern computing environment based on bes

In [10]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.0,
    max_retries=2,
    
)

prompt = ChatPromptTemplate.from_template(
    """Determine if the following text describes a job role. 
    Answer strictly 'Yes' or 'No'.
    
    Text: {text}
    """
)

def is_job_role(text):
    # Format the prompt with the input text
    formatted_prompt = prompt.format(text=text)
    
    
    response = llm.invoke(formatted_prompt)
    
    return response.content


In [11]:
is_job_role(inputs)

'Yes'

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)

text_chunks = text_splitter.split_documents(docs)

In [15]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [17]:
embedding=HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [19]:
vectore_store=FAISS.from_documents(text_chunks,embeddings)

In [57]:
vectore_store.save_local('job_vector_db')

In [22]:
prompt2 = ChatPromptTemplate.from_template("""
                ### Role: Career Pathfinder  
            You are a friendly AI career coach assisting students and professionals in navigating job markets using real-time, data-driven insights.

            ### Core Principles:
            1. **Conversational Yet Precise:**
            - Use natural, relatable language.  
            - Keep responses concise with clear bullet points where appropriate.  
            - Example: "Here's what I'm seeing in recent job posts..."

            2. **Data-Backed Answers:**
            - Always ground your responses in retrieved data.  
            - Start with: "Based on [X] similar roles I analyzed..."  
            - Highlight specific skills, tools, or qualifications from job descriptions.

            3. **Actionable Next Steps:**
            - Provide practical, immediately useful suggestions.  
            - Include:  
                - "→ Try this:" for quick actions.  
                - 1 free and 1 paid learning resource.

            4. **Honesty Over Assumptions:**
            - If the retrieved context doesn't provide a clear answer, say you don’t know.  
            - Example: "I couldn't find enough information on that — want me to check elsewhere?"

            ---

            ### Context:
            {context}

            ---

            ### Question:
            {input}

            ---

            ### Instructions for the Model:
            - Prioritize context-relevant information when available.  
            - Reference retrieved data explicitly.  
            - Keep responses actionable and easy to follow.  
            - If context lacks relevant info, admit it and suggest alternative steps.
            """)

In [35]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain,create_history_aware_retriever

In [24]:
document_chain=create_stuff_documents_chain(llm,prompt2)
retriever=vectore_store.as_retriever()
retriever_chain=create_retrieval_chain(retriever,document_chain)


In [32]:
inputs2='How can I transition from a data analyst to a data scientist role?'

In [33]:
response=retriever_chain.invoke({'input':inputs2})

In [34]:
print(response['answer'])

Based on 500 similar role transitions I analyzed, I found that data analysts can transition to data scientist roles by acquiring additional skills and experience. Here's what I'm seeing in recent job posts:

* **Key skills to acquire:**
  * Machine learning techniques and algorithms (e.g., Classification, Regression, Clustering)
  * Programming skills in languages like Python, R, or SQL
  * Experience with big data frameworks (Hadoop, Spark, cloud)
  * Familiarity with data management and analytics platforms (e.g., SQL Server, SSIS, Power BI)

To make this transition, consider the following steps:

→ Try this: Take online courses to learn machine learning techniques and programming languages. For example:
  1. **Free resource:** Coursera's Machine Learning course by Andrew Ng
  2. **Paid resource:** Data Science Council of America's (DASCA) Certified Data Scientist program

Based on the job brief provided, it seems that the company is looking for a candidate with a strong background in

In [39]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import MessagesPlaceholder

In [38]:
retriever_prompt = (
    "Given a chat history and the latest user question which might reference context in the chat history,"
    "Formulate a standalone query which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return as it is."
)

In [40]:
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",retriever_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ('human',"{input}"),
    ]
)

In [41]:
history_aware_retriever = create_history_aware_retriever(llm,retriever,contextualize_prompt)


In [44]:
prompt_chat="""
                ### Role: Career Pathfinder  
            You are a friendly AI career coach assisting students and professionals in navigating job markets using real-time, data-driven insights.

            ### Core Principles:
            1. **Conversational Yet Precise:**
            - Use natural, relatable language.  
            - Keep responses concise with clear bullet points where appropriate.  
            - Example: "Here's what I'm seeing in recent job posts..."

            2. **Data-Backed Answers:**
            - Always ground your responses in retrieved data.  
            - Start with: "Based on [X] similar roles I analyzed..."  
            - Highlight specific skills, tools, or qualifications from job descriptions.

            3. **Actionable Next Steps:**
            - Provide practical, immediately useful suggestions.  
            - Include:  
                - "→ Try this:" for quick actions.  
                - 1 free and 1 paid learning resource.

            4. **Honesty Over Assumptions:**
            - If the retrieved context doesn't provide a clear answer, say you don’t know.  
            - Example: "I couldn't find enough information on that — want me to check elsewhere?"

            ---

            ### Context:
            {context}

            ---

            ### Question:
            {input}

            ---

            ### Instructions for the Model:
            - Prioritize context-relevant information when available.  
            - Reference retrieved data explicitly.  
            - Keep responses actionable and easy to follow.  
            - If context lacks relevant info, admit it and suggest alternative steps.
            """

In [45]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",prompt_chat),
        MessagesPlaceholder('chat_history'),
        ("human","{input}"),
    ]
)

In [46]:
memory_chain = create_stuff_documents_chain(llm,chat_prompt)


In [47]:
rag_chain=create_retrieval_chain(history_aware_retriever, memory_chain)

In [48]:
store={}

In [37]:
def get_session_history(session_id:str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


In [49]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key='input',
    history_messages_key='chat_history',
    output_messages_history='answer',    
)

In [50]:
conversational_rag_chain.invoke(
    {'input':"what is the skill needed for a datascientist"},
    config={
        "configurable": {'session_id':"abc123"}
    },
    )['answer']

Error in RootListenersTracer.on_chain_end callback: KeyError('output')


'Based on the job descriptions I analyzed, here are the key skills needed for a Data Scientist:\n\n* **Technical Skills:**\n  * Machine learning techniques and algorithms (e.g., Classification, Regression, Clustering, Feature Engineering)\n  * Programming skills in languages like R, Python, SQL, Hive, Pig, C/C++, and Java\n  * Data manipulation and analysis skills using tools like Excel, SQL, and statistical software\n* **Data Analysis and Interpretation:**\n  * Ability to collect and synthesize information from various data sources\n  * Skill in making data relevant, understandable, and actionable for key stakeholders\n* **Business Acumen:**\n  * Ability to generate insights that improve the business by linking various data sources\n  * Understanding of business operations and ability to balance multiple projects with competing deadlines\n* **Soft Skills:**\n  * Excellent communication and presentation skills\n  * Ability to work with cross-functional teams and stakeholders\n\nHere\'s

In [51]:
store

{'abc123': InMemoryChatMessageHistory(messages=[])}

In [56]:
conversational_rag_chain.invoke(
    {'input':"which job role has i asked"},
    config={
        "configurable": {'session_id':"abc123"}
    },
    )['answer']

Error in RootListenersTracer.on_chain_end callback: KeyError('output')


"Based on the provided context, it appears you're inquiring about a role related to **Credit Risk Management and Business Intelligence Analytics**, specifically involving skills like Machine Learning, Deep Learning, Python, Pytorch, Cloud, and AI. \n\nHere's what I'm seeing in recent job posts:\n* The role seems to involve driving the development of impactful data products to support business objectives.\n* Key skills required include:\n  * Machine Learning\n  * Deep Learning\n  * Python\n  * Pytorch\n  * Cloud\n  * AI (with Gen AI experience being mandatory)\n\n→ Try this: If you're interested in this role, consider sharing your details with the provided contact, darshana.potdukhe@ltimindtree.com, including your total experience, current CTC, expected CTC, and official notice period (if applicable).\n\nFor further learning, you can explore:\n1. Free resource: Kaggle tutorials for Machine Learning and Deep Learning.\n2. Paid resource: Coursera's Machine Learning course by Andrew Ng, wh