In [1]:
from typing import List, Dict, Any, Optional
from typing_extensions import TypedDict
import os
import json

from langgraph.graph import StateGraph, START, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from tavily import TavilyClient

In [2]:
api_key = os.getenv("GEMINI_API_KEY")
gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=api_key,
    temperature=0.2
)

In [3]:
tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

def tavily_search(query: str, max_results: int = 5) -> list:
    """
    Search Tavily for resources (articles, tutorials, docs).
    Returns list of {title, url}.
    """
    results = tavily.search(query=query, max_results=max_results)
    return [{"title": r["title"], "url": r["url"]} for r in results["results"][:max_results]]


In [4]:
class State(TypedDict):
    user_input: str
    user_skills: List[str]
    raw_llm_output: str
    jobs_data: Dict[str, Any]
    final_display: str
    chosen_career: Optional[str]
    resources_display: Optional[str]
    roadmap_display: Optional[str]


In [5]:
def parse_input(state: State) -> State:
    state["user_skills"] = [s.strip().lower() for s in state["user_input"].split(",")]
    return state

In [6]:
prompt_template = PromptTemplate(
    template="""
You are CareerSage, an AI career advisor.

Input: {input}.

Task:
1) Suggest 3-5 suitable career paths.
2) For each, provide:
   - Job title
   - Short description
   - Missing skills
   - Free online learning resources (provide at least one link per skill)
3) Return ONLY JSON with key "jobs" (list of job objects). Do NOT include extra text.
""",
    input_variables=["input"]
)

In [None]:
def generate_careers(state: State) -> State:
    prompt = prompt_template.format(input=state["user_input"])
    response = gemini_llm.invoke([{"role": "user", "content": prompt}])
    state["raw_llm_output"] = response.content
    return state

In [8]:
def calculate_match(job_and_skills: dict) -> float:
    """
    Calculate how well user skills match a job.
    """
    job = job_and_skills["job"]
    user_skills = [s.lower() for s in job_and_skills["user_skills"]]

    raw_missing = job.get("missing_skills", [])
    missing_skills = []
    for s in raw_missing:
        if isinstance(s, str):
            missing_skills.append(s.lower())
        elif isinstance(s, dict) and "skill" in s:
            missing_skills.append(s["skill"].lower())

    matched_skills = len([s for s in user_skills if s not in missing_skills])
    total_skills = matched_skills + len(missing_skills)
    if total_skills == 0:
        return 0.0
    return round((matched_skills / total_skills) * 100, 2)

In [None]:
def parse_and_match(state: State) -> State:
    text = state["raw_llm_output"]
    start, end = text.find("{"), text.rfind("}") + 1
    jobs_json = text[start:end]
    jobs_data = json.loads(jobs_json)

    for job in jobs_data.get("jobs", []):
        raw_missing = job.get("missing_skills", [])
        missing_skills = []
        for s in raw_missing:
            if isinstance(s, str):
                missing_skills.append(s.lower())
            elif isinstance(s, dict) and "skill" in s:
                missing_skills.append(s["skill"].lower())
        job["missing_skills"] = missing_skills

        job["match_percent"] = calculate_match(
            {"job": job, "user_skills": state["user_skills"]}
        )

        job["resources"] = {}
        for skill in missing_skills:
            job["resources"][skill] = tavily_search(f"free learning resources for {skill}")

    state["jobs_data"] = jobs_data
    return state

In [None]:
def format_display(state: State) -> State:
    display_text = "\nCareer Suggestions:\n"
    for idx, job in enumerate(state["jobs_data"]["jobs"], start=1):
        display_text += f"\n{idx}) {job['job_title']} — Match: {job['match_percent']}%"
        display_text += f"\n   Description: {job['description']}"
        display_text += f"\n   Missing Skills: {', '.join(job.get('missing_skills', []))}\n"
    state["final_display"] = display_text
    return state

In [None]:
def choose_career(state: State) -> State:
    if not state.get("chosen_career"):
        state["chosen_career"] = state["jobs_data"]["jobs"][0]["job_title"]
    return state


In [12]:
def show_resources(state: State) -> State:
    chosen = state["chosen_career"]
    job = next(j for j in state["jobs_data"]["jobs"] if j["job_title"] == chosen)

    display = f"\n📘 Learning Resources for {chosen}:\n"
    for skill, resources in job.get("resources", {}).items():
        display += f"\n🔹 {skill.capitalize()}:\n"
        top_resources = resources[:5] if len(resources) >= 5 else resources
        for r in top_resources:
            display += f"   - {r['title']} ({r['url']})\n"
    state["resources_display"] = display
    return state

In [13]:
def generate_roadmap(state: State) -> State:
    chosen = state["chosen_career"]
    job = next(j for j in state["jobs_data"]["jobs"] if j["job_title"] == chosen)
    missing_skills = job.get("missing_skills", [])
    resources = job.get("resources", {})

    roadmap_text = f"\n📅 Learning Roadmap for {chosen}:\n"
    day = 1
    for skill in missing_skills:
        roadmap_text += f"\n🗓 Day {day}-{day+3}: Focus on {skill.capitalize()}\n"
        skill_resources = resources.get(skill, [])
        for idx, r in enumerate(skill_resources, start=1):
            roadmap_text += f"   {idx}. {r['title']} ({r['url']})\n"
        day += 4

    roadmap_text += "\nContinue sequentially for all missing skills. Adjust pace based on your comfort level."
    state["roadmap_display"] = roadmap_text
    return state

In [14]:
graph_builder = StateGraph(State)
graph_builder.add_node("parse_input", parse_input)
graph_builder.add_node("generate_careers", generate_careers)
graph_builder.add_node("parse_and_match", parse_and_match)
graph_builder.add_node("format_display", format_display)
graph_builder.add_node("choose_career", choose_career)
graph_builder.add_node("show_resources", show_resources)
graph_builder.add_node("generate_roadmap", generate_roadmap)

graph_builder.set_entry_point("parse_input")
graph_builder.add_edge("parse_input", "generate_careers")
graph_builder.add_edge("generate_careers", "parse_and_match")
graph_builder.add_edge("parse_and_match", "format_display")
graph_builder.add_edge("format_display", "choose_career")
graph_builder.add_edge("choose_career", "show_resources")
graph_builder.add_edge("show_resources", "generate_roadmap")
graph_builder.add_edge("generate_roadmap", END)

career_agent = graph_builder.compile()

In [17]:
# Step 1: Run until formatted display
partial_state = career_agent.invoke({"user_input": "python, sql, machine learning"}, until="format_display")
print(partial_state["final_display"])

# Step 2: Ask user for choice
choice = input("\nWhich career number do you want to pursue? ")
try:
    choice_idx = int(choice) - 1
    partial_state["chosen_career"] = partial_state["jobs_data"]["jobs"][choice_idx]["job_title"]
except:
    partial_state["chosen_career"] = partial_state["jobs_data"]["jobs"][0]["job_title"]

# Step 3: Continue graph from show_resources
final_state = career_agent.invoke(partial_state, start="show_resources")
print(final_state["resources_display"])
print(final_state["roadmap_display"])



Career Suggestions:

1) Machine Learning Engineer — Match: 60.0%
   Description: Develops and implements machine learning models for various applications, requiring strong programming skills, statistical knowledge, and experience with machine learning frameworks.
   Missing Skills: deep learning, cloud computing (aws, azure, gcp)

2) Data Scientist — Match: 60.0%
   Description: Analyzes large datasets to extract insights and develop data-driven solutions, requiring strong analytical, statistical, and programming skills.
   Missing Skills: data visualization (tableau, power bi), big data technologies (spark, hadoop)

3) Data Engineer — Match: 60.0%
   Description: Builds and maintains data pipelines and infrastructure to support data analysis and machine learning, requiring strong programming, database, and system administration skills.
   Missing Skills: etl processes, devops principles

4) Business Intelligence Analyst — Match: 60.0%
   Description: Analyzes business data to identif