In [34]:
import os
from dotenv import load_dotenv

# Load API keys from .env file
load_dotenv()

# Set environment variables
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")


In [46]:
from langchain_community.document_loaders import PyMuPDFLoader

# Load and read PDF resume
loader = PyMuPDFLoader("C://Users//HARSHIT//Downloads//Harshit_Resume.pdf")
doc = loader.load()

In [47]:
doc

[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-05-25T13:11:13+00:00', 'source': 'C://Users//HARSHIT//Downloads//Harshit_Resume.pdf', 'file_path': 'C://Users//HARSHIT//Downloads//Harshit_Resume.pdf', 'total_pages': 1, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-05-25T13:11:13+00:00', 'trapped': '', 'modDate': 'D:20250525131113Z', 'creationDate': 'D:20250525131113Z', 'page': 0}, page_content='Harshit Khatta\nharshitkhatta7@gmail.com | +91 62807 33152 | LinkedIn | GitHub: HarshitKhatta\nExperience\nData Analyst\nDec 2024 - Feb 2025\nErnst & Young (EY)\nNoida, India\n• Automated data pipelines by building Python scripts for data cleaning, transformation, and validation\nacross multiple sources, improving data quality and reducing processing time by 30%\n• Designed and deployed Alteryx ETL workflows, reducing manual data handling time by 40% and\nincreasing reporting efficiency for

In [48]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

split_docs = text_splitter.split_documents(documents)


In [49]:
split_docs

[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-05-25T13:11:13+00:00', 'source': 'C://Users//HARSHIT//Downloads//Harshit_Resume.pdf', 'file_path': 'C://Users//HARSHIT//Downloads//Harshit_Resume.pdf', 'total_pages': 1, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-05-25T13:11:13+00:00', 'trapped': '', 'modDate': 'D:20250525131113Z', 'creationDate': 'D:20250525131113Z', 'page': 0}, page_content='Harshit Khatta\nharshitkhatta7@gmail.com | +91 62807 33152 | LinkedIn | GitHub: HarshitKhatta\nExperience\nData Analyst\nDec 2024 - Feb 2025\nErnst & Young (EY)\nNoida, India\n• Automated data pipelines by building Python scripts for data cleaning, transformation, and validation\nacross multiple sources, improving data quality and reducing processing time by 30%\n• Designed and deployed Alteryx ETL workflows, reducing manual data handling time by 40% and'),
 Document(metadata={'producer': '

In [None]:
from langchain_community.embeddings import OllamaEmbeddings

embedding = OllamaEmbeddings(model="nomic-embed-text")  


In [51]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(split_docs, embedding=embedding)
retriever = vectorstore.as_retriever()


In [52]:
# ✅ 6. Define Job Description
job_description = """
We are looking for a Python developer with strong experience in machine learning, 
NLP, Pandas, and Scikit-learn. Familiarity with LLM tools, good communication skills, and 
a strong understanding of data pipelines is preferred.
"""

In [None]:
from langchain_core.prompts import ChatPromptTemplate


prompt = ChatPromptTemplate.from_messages([
            ("system",
            "You are a professional resume evaluator.\n"
            "You are a recruitment consultant or HR expert.\n"
            "Given the candidate's resume and job description, evaluate the candidate's fit for the role.\n"
            "List strengths, weaknesses, improvement suggestions, top skills, missing skills, and give a fit score out of 100.\n"
            "Be concise and clear.\n"
            "Resume:\n"
            "{context}\n\n"
            "Job Description:\n" + job_description),
            ("human", "{input}")
        ])



In [54]:
from langchain.llms import Ollama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [55]:
llm = Ollama(
    model="gemma3:1b",)

doc_chain=create_stuff_documents_chain(llm,prompt)
retrieval_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=doc_chain)

In [56]:
# job_description = """
# We are hiring a Python developer with strong experience in machine learning, Pandas, Scikit-learn, and NLP. Excellent communication is a plus.
# """

query = "Evaluate the resume based on the job description above."
response = retrieval_chain.invoke({"input": query})

print("\n📝 Evaluation Result:\n")
print(response["answer"])


📝 Evaluation Result:

Okay, let’s break down this candidate’s resume and determine a fit score.

**Overall Fit Score: 85/100**

**Strengths:**

*   **Strong Machine Learning Focus:** The resume directly highlights experience with several key ML techniques: Linear & Logistic Regression, Decision Trees, Random Forest, SVM, KNN, Naive Bayes, and more. This is *critical* for the job description.
*   **NLP Expertise:** The experience with RNNs, sentiment classification (IMDB), text vectorization, and Streamlit demonstrates a solid understanding of Natural Language Processing.
*   **Data Engineering Skills:**  The experience with data pipelines, ETL workflows, and database interaction (MongoDB, SQLite, Oracle) showcases practical data management skills – crucial for a developer role.
*   **Tools & Technologies:**  The inclusion of TensorFlow, Keras, Scikit-learn, Pandas, NumPy, Matplotlib, Seaborn, Plotly, Git, GitHub, and Alteryx are all highly relevant to the job.
*   **Experience with St