In [12]:
# Keeping the api key safe :)
import os
from dotenv import load_dotenv

# Load environment variables from a .env file (recommended)
load_dotenv()
api_key = os.getenv("OPENROUTER_API_KEY")

In [13]:
# CELL: SIMPLE CHAIN — MODERN LANGCHAIN (v0.2+), NO AGENT, NO STUCK

import pandas as pd
import json
import re
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

MODEL = "mistralai/mistral-7b-instruct"  # or just "mistralai/mistral-7b-instruct"
MAX_PROFILES = 15

# === LOAD CSV ===
df = pd.read_csv("employee_skills_dataset.csv").fillna("")
print(f"Loaded {len(df)} employees")

# === LLM SETUP ===
llm = ChatOpenAI(
    model=MODEL,
    openai_api_key=api_key,  # Replace!
    openai_api_base="https://openrouter.ai/api/v1",
    temperature=0.1,
    max_tokens=1024
)

# === STEP 1: EXTRACT NEED ===
extract_prompt = PromptTemplate.from_template("""
Summarize this request in 3 bullets:
• Role & seniority
• Skills + years
• Domain

Request: {query}

Bullets:
""")

extract_chain = extract_prompt | llm | StrOutputParser()

# === STEP 2: LOAD PROFILES ===
def load_profiles(pm_need):
    profiles = []
    for _, row in df.head(MAX_PROFILES).iterrows():
        profiles.append({
            "id": int(row["ID"]),
            "name": row["Name"],
            "role": row["Role"],
            "seniority": row["Seniority"],
            "skills": row["Skills"],
            "years": int(row["Years_Experience"]),
            "domain": row["Last_Project_Domain"],
            "availability": int(row["Availability_Score"]),
            "bio": row["Bio"][:500]
        })
    return json.dumps(profiles, ensure_ascii=False, indent=2)

# === STEP 3: RANK TOP 5 ===
rank_prompt = PromptTemplate.from_template("""
PM Need:
{need}

All Candidates (JSON):
{profiles}

**TASK**: Return **ONLY** valid JSON array of **TOP 5** best matches.
Score 0-100. 2-sentence justification each.

Format exactly:
[
  {{"id": 101, "name": "Alice", "fit_score": 92, "justification": "Sentence 1. Sentence 2."}},
  ...
]

TOP 5 JSON ONLY. No other text.
""")

rank_chain = rank_prompt | llm | StrOutputParser()

# === RUN PIPELINE ===
def find_talent(query: str):
    print(f"\nQUERY: {query}")

    # Step 1: Extract
    print("\n1. Extracting need...")
    need = extract_chain.invoke({"query": query}).strip()
    print(f"   Need: {need}")

    # Step 2: Load
    print("\n2. Loading profiles...")
    profiles = load_profiles(need)

    # Step 3: Rank
    print("\n3. Ranking top 5...")
    result = rank_chain.invoke({"need": need, "profiles": profiles})

    # Parse JSON
    match = re.search(r'\[.*\]', result, re.DOTALL)
    if match:
        try:
            candidates = json.loads(match.group(0))
            print(f"\nTOP {len(candidates)} MATCHES:\n")
            for i, c in enumerate(candidates[:5], 1):
                print(f"{i}. {c['name']} | {c['fit_score']}/100")
                print(f"   {c['justification']}\n")
            return candidates
        except json.JSONDecodeError as e:
            print("JSON Parse Error:", e)
            print("Raw output:", result)
            return []
    else:
        print("No JSON array found. Raw result:", result)
        return []

# === TEST ===
query = input("PM Request: ") or "Staff with Python skills in Fintech domain, at least 3 years experience"
find_talent(query)

Loaded 15 employees

QUERY: I need a senior with Java Experience


1. Extracting need...
   Need: • Senior Java Developer
• 5+ years of Java experience
• Software development or related domain

2. Loading profiles...

3. Ranking top 5...
JSON Parse Error: Expecting value: line 1 column 2 (char 1)
Raw output:  <s> [OUT] [
  {"id": 104, "name": "Diana", "fit_score": 95, "justification": "Diana has 10 years of Java experience and is a Senior Backend Developer, perfectly matching the role requirements. Her expertise in Spring Boot and microservices aligns with modern software development practices."},
  {"id": 115, "name": "Oscar", "fit_score": 90, "justification": "Oscar is a Senior Backend Developer with 8 years of Java experience, specializing in Django and PostgreSQL, which are highly relevant to software development. His strong background in scalable API design and CI/CD pipelines makes him a strong candidate."},
  {"id": 101, "name": "Alice", "fit_score": 85, "justification": "Alice 

[]