In [1]:
import os
import asyncio
from dotenv import load_dotenv
load_dotenv()
import logging
logging.getLogger("httpx").setLevel(logging.WARNING)

In [2]:
from linkedin_api import Linkedin
api = Linkedin(os.environ["LINKEDIN_EMAIL"], os.environ["LINKEDIN_PASS"])

In [3]:
from langchain_community.document_loaders import PyPDFLoader

def load_cv():
    loader = PyPDFLoader("data/Touhidul Alam - Resume.pdf")
    pages = loader.load()
    page_content = ''
    for i in range(len(pages)):
        page_content += pages[i].page_content
    return page_content

In [4]:
def get_job_type(job_type):
    job_type_mapping = {
        "full-time": "F",
        "contract": "C",
        "part-time": "P",
        "temporary": "T",
        "internship": "I",
        "volunteer": "V",
        "other": "O"
    }

    return job_type_mapping.get(job_type.lower())

def get_job_ids(keywords, location_name, job_type=None, limit=10, companies=None, industries=None, remote=None):
    if job_type is not None:
        job_type = get_job_type(job_type)
    print("Arguments are:", keywords, location_name, job_type, limit, companies, industries, remote)

    job_postings = api.search_jobs(
        keywords=keywords,
        job_type=job_type,
        location_name=location_name,
        companies=companies,
        industries=industries,
        remote=remote,
        limit = limit
    )
    # Extracting just the part after "jobPosting:" from the trackingUrn and the title using list comprehension
    job_ids = [job['trackingUrn'].split('jobPosting:')[1] for job in job_postings]
    return job_ids
#job_ids

In [5]:
import nest_asyncio
nest_asyncio.apply()

async def get_job_details(job_id):
    try:
        job_data = api.get_job(job_id)  # Assuming this function is async and fetches job data
        
        # Construct the job data dictionary with defaults
        job_data_dict = {
            "company_name": job_data.get('companyDetails', {}).get('com.linkedin.voyager.deco.jobs.web.shared.WebCompactJobPostingCompany', {}).get('companyResolutionResult', {}).get('name', ''),
            "company_url": job_data.get('companyDetails', {}).get('com.linkedin.voyager.deco.jobs.web.shared.WebCompactJobPostingCompany', {}).get('companyResolutionResult', {}).get('url', ''),
            "job_desc_text": job_data.get('description', {}).get('text', ''),
            "work_remote_allowed": job_data.get('workRemoteAllowed', ''),
            "job_title": job_data.get('title', ''),
            "company_apply_url": job_data.get('applyMethod', {}).get('com.linkedin.voyager.jobs.OffsiteApply', {}).get('companyApplyUrl', ''),
            "job_location": job_data.get('formattedLocation', '')
        }
    except Exception as e:
        # Handle exceptions or errors in fetching or parsing the job data
        print(f"Error fetching job details for job ID {job_id}: {str(e)}")
        job_data_dict = {
            "company_name": '',
            "company_url": '',
            "job_desc_text": '',
            "work_remote_allowed": '',
            "job_title": '',
            "company_apply_url": '',
            "job_location": ''
        }

    return job_data_dict

async def fetch_all_jobs(job_ids, batch_size=10):
    results = []
    for job_id in job_ids:
        job_detail = await get_job_details(job_id)
        results.append(job_detail)
    return results

async def job_threads(job_ids):
    return await fetch_all_jobs(job_ids, 10)

In [6]:
#define LLMs
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4-0125-preview", openai_api_key=os.environ["OPENAI_API_KEY"])

#from langchain_groq import ChatGroq
#llm = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="mixtral-8x7b-32768")

In [7]:
%%time
#define tools
from langchain.agents import tool

@tool
def job_pipeline(keywords: str, location_name:str, job_type:str=None, limit:int=10, companies:str=None, industries:str=None, remote:str=None) -> dict:
    """Given a query identify job role, location and other optional arguments: job type, search/hit limit, companies, industries, remote job 
    and returns a list of Linkedin job posting title, company url, job location and detailed job description"""
    job_ids = get_job_ids(keywords, location_name, job_type, limit, companies, industries, remote)
    print(job_ids)
    job_desc = asyncio.run(job_threads(job_ids))
    return job_desc

@tool
def extract_cv() -> str:
    """From the CV text, extract relevant skill, experience, previous job responsibility, project experience etc.
    Returns the unstructured CV text in a structured format later to analyzer. Consider only job relevant skill, not any personal information"""
    text = load_cv()
    return text

@tool
def generate_letter_for_specific_job() -> str:
    """Given the CV and highest relevant job, write a cover letter according to CV and matching with the job description. \
       Return the letter containing contact info, proper addresser, some description of the career and motivation for this job."""
    return text 


tools = [job_pipeline, extract_cv, generate_letter_for_specific_job]

CPU times: user 359 ms, sys: 35.7 ms, total: 395 ms
Wall time: 434 ms


In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_openai import ChatOpenAI


def create_agent(llm: ChatOpenAI, tools: list, system_prompt: str):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_tools_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

In [9]:
def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {"messages": [HumanMessage(content=result["output"], name=name)]}

In [10]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

members = ["Searcher", "Analyzer", "Generator"]
system_prompt = (
    "You are a supervisor tasked with managing a conversation between the"
    " following workers:  {members}. Given the following user request,"
    " respond with the worker to act next. Each worker will perform a"
    " task and respond with their results and status." 
    #" Task will include, extracting cv content, searching jobs given user query, write modified cv according to best matched jobs."
    " When finished, respond with FINISH."
)

# Our team supervisor is an LLM node. It just picks the next agent to process
# and decides when the work is completed
options = ["FINISH"] + members
# Using openai function calling can make output parsing easier for us
function_def = {
    "name": "route",
    "description": "Select the next role.",
    "parameters": {
        "title": "routeSchema",
        "type": "object",
        "properties": {
            "next": {
                "title": "Next",
                "anyOf": [
                    {"enum": options},
                ],
            }
        },
        "required": ["next"],
    },
}

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="messages"),
        (
            "system",
            "Given the conversation above, who should act next?"
            " Or should we FINISH? Select one of: {options}",
        ),
    ]
).partial(options=str(options), members=", ".join(members))

from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
llm_with_tools = llm.bind_tools(tools)

supervisor_chain = (
    prompt
    | llm.bind_functions(functions=[function_def], function_call="route") 
    | JsonOutputFunctionsParser() #OpenAIToolsAgentOutputParser()
)

In [11]:
import operator
from typing import Annotated, Sequence, TypedDict
import functools
from langgraph.graph import StateGraph, END


# The agent state is the input to each node in the graph
class AgentState(TypedDict):
    # The annotation tells the graph that new messages will always
    # be added to the current states
    messages: Annotated[Sequence[BaseMessage], operator.add]
    # The 'next' field indicates where to route to next
    next: str


search_agent = create_agent(llm, [job_pipeline], "Given user queries of searching a role with necessary parameters, show the most relevant jobs with title, company url, job location and detailed job description")
search_node = functools.partial(agent_node, agent=search_agent, name="Searcher")

analyzer_agent = create_agent(llm, [extract_cv], "Analyzer has access to the CV and most relevant jobs from the Searcher. Analyzer extract necessary information to be able to match with the jobs. Then analyze all the searched jobs and reason which one is highest matching.")
analyzer_node = functools.partial(agent_node, agent=analyzer_agent, name="Analyzer")

generator_agent = create_agent(llm, [generate_letter_for_specific_job], "Given the extracted CV and highest matching job from the Analyzer, write a cover letter motivated with the job.")
generator_node = functools.partial(generator_agent, agent=generator_agent, name="Generator")

workflow = StateGraph(AgentState)
workflow.add_node("Searcher", search_node)
workflow.add_node("Analyzer", analyzer_node)
workflow.add_node("Generator", generator_node)
workflow.add_node("supervisor", supervisor_chain)

In [12]:
for member in members:
    # We want our workers to ALWAYS "report back" to the supervisor when done
    workflow.add_edge(member, "supervisor")
# The supervisor populates the "next" field in the graph state
# which routes to a node or finishes
conditional_map = {k: k for k in members}
conditional_map["FINISH"] = END
workflow.add_conditional_edges("supervisor", lambda x: x["next"], conditional_map)
# Finally, add entrypoint
workflow.set_entry_point("supervisor")

graph = workflow.compile()

In [13]:
from IPython.display import Image
graph.get_graph().print_ascii()
#Image(graph.get_graph().draw_png())

                                         +-----------+                                           
                                         | __start__ |                                           
                                         +-----------+                                           
                                                *                                                
                                                *                                                
                                                *                                                
                                        +------------+                                           
                                      **| supervisor |****                                       
                               *******  +------------+*   *******                                
                        *******          ***           ***       *******                         
                 ***

In [14]:
for s in graph.stream(
    {"messages": [HumanMessage(content="Find data science job for me in Germany maximum 5 relevant one. Then analyze my CV and write me a cover letter according to the best matching job.")]},
    {"recursion_limit": 100},
):
    if "__end__" not in s:
        print(s)
        print("----")

{'supervisor': {'next': 'Searcher'}}
----
Arguments are: data science Germany None 5 None None None
['3803763473', '3703401972', '3888041228', '3860328669', '3808181363']
{'Searcher': {'messages': [HumanMessage(content="Here are the top 5 relevant data science jobs found in Germany:\n\n1. **Volunteer: Microfinance Project in South Africa**\n   - **Company:** Global Nomadic\n   - **Location:** Munich, Bavaria, Germany\n   - **Description:** This internship involves setting up and implementing a microfinance initiative to address poverty and unemployment. You'll collaborate with neighbourhood care points to help vulnerable and orphaned children. [Apply here](https://globalnomadic.com/project/swaziland-microfinance-pilot-project-internship/)\n   - [Company Website](https://www.linkedin.com/company/global-nomadic)\n\n2. **Machine Learning Engineer**\n   - **Company:** Celonis\n   - **Location:** Darmstadt, Hesse, Germany\n   - **Description:** You'll be designing and developing product fea

In [15]:
#Find data engineering job for me maximum 10 in Germany. Then choose the most relevant one and write me a slightly modified cv for that job"