In [None]:
import os
from dotenv import load_dotenv
import langgraph
from typing import Dict, TypedDict, Annotated, Optional
from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain_google_genai.llms import GoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langgraph.graph import StateGraph

In [None]:
load_dotenv()

In [None]:
llm = GoogleGenerativeAI( model = "gemini-2.0-flash" )
embeddings_model = OllamaEmbeddings( model = "nomic-embed-text:latest" )

In [None]:
class ResumeState(TypedDict):
    folder_path: Annotated[str, "static"]  
    job_description: Annotated[str, "static"]  
    resumes: Optional[Dict[str, str]]  
    ranked_candidates: Optional[list]  
    summaries: Optional[Dict[str, str]] 

In [None]:
def extract_resumes(state: ResumeState) -> Dict:   
    folder_path = state["folder_path"]
    resumes = {}

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if file.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif file.endswith(".docx"):
            loader = UnstructuredWordDocumentLoader(file_path)
        else:
            continue
        
        docs = loader.load()
        text = "\n".join([doc.page_content for doc in docs])
        resumes[file] = text

    return {"resumes": resumes} 

In [None]:
def rank_candidates(state: ResumeState) -> Dict:
    job_description = state["job_description"]
    resumes = state["resumes"]

    job_embedding = embeddings_model.embed_query(job_description)
    vector_store = FAISS.from_texts(list(resumes.values()), embeddings_model)

    ranked_candidates = []
    for file, text in resumes.items():
        score = vector_store.similarity_search_with_score(job_embedding, k=1)
        ranked_candidates.append({"file": file, "score": score[0][1]})

    ranked_candidates.sort(key=lambda x: x["score"], reverse=True)
    return {"ranked_candidates": ranked_candidates}

In [None]:
def summarize_candidates(state: ResumeState) -> Dict:
    resumes = state["resumes"]
    summaries = {}

    prompt = PromptTemplate(
        input_variables=["resume"],
        template="Summarize the candidate's key strengths from the following resume:\n{resume}"
    )
    chain = LLMChain(llm=llm, prompt=prompt)
    for file, text in resumes.items():
        summaries[file] = chain.run(text)

    return {"summaries": summaries}

In [None]:
graph = StateGraph(ResumeState)


graph.add_node("extract_resumes", extract_resumes)
graph.add_node("rank_candidates", rank_candidates)
graph.add_node("summarize_candidates", summarize_candidates)

graph.set_entry_point("extract_resumes")
graph.add_edge("extract_resumes", "rank_candidates")
graph.add_edge("extract_resumes", "summarize_candidates")

In [None]:
resume_agent = graph.compile()

In [None]:
input_data = {
    "folder_path": "./ENGINEERING",
    "job_description": "Looking for an autocad professional and who is also well versed with Mocrosoft excel."
}

In [None]:
output = resume_agent.invoke(input_data)

In [None]:
print("\n📌 Ranked Candidates:")
for candidate in output["ranked_candidates"]:
    print(f"✅ {candidate['file']} | Score: {candidate['score']}")



In [None]:
print("\n📜 Candidate Summaries:")
for file, summary in output["summaries"].items():
    print(f"📝 {file}: {summary}\n")