In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader,PyPDFLoader
from langchain_community.docstore.document import Document
from typing import List

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
def load_document(file_path:str)->List[Document]:
    """Take a filepath a return the list of documents"""
    loader=PyPDFLoader(
        file_path,
    )
    documents=loader.load()
    return documents

In [4]:
documents=load_document(r"C:/Users/PMYLS/Downloads/Khizer's Resume (1).pdf")
documents


[Document(metadata={'producer': 'Canva', 'creator': 'Canva', 'creationdate': '2025-05-07T13:27:19+00:00', 'title': 'Purple and White Clean and Professional Resume', 'moddate': '2025-05-07T13:27:18+00:00', 'keywords': 'DAGmw1BIgjw,BAGc12IvLL4,0', 'author': 'Jack Sparrow', 'source': "C:/Users/PMYLS/Downloads/Khizer's Resume (1).pdf", 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="KHIZER DAWOOD\nSUMMARY\nI am from AI background, with strong experience in machine learning and data science. I completed a Machine\nLearning internship at Unified Mentor India, where I gained hands-on experience in deploying machine learning\nmodels and implementing CI/CD workflows. I am further enhancing my skills through a Data Science Lab course\nfrom WorldQuant University. Additionally, I have practical experience working with Tableau for data visualization\nand dashboard creation, strengthening my ability to turn data insights into actionable results.\nSKILLS\nWORK EXPERIENCE\nPython Progra

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
def filter_documents(docs:List[Document])->List[Document]:
    """give a list of document objects, return a new list Document objects containing only 'source' in metadata and the original page_content"""
    filter_docs:List[Document]=[]
    for doc in docs:
        src=doc.metadata.get("source")
        filter_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source":src}
            )
        )
    return filter_docs

In [6]:
filter_docs=filter_documents(documents)
filter_docs

[Document(metadata={'source': "C:/Users/PMYLS/Downloads/Khizer's Resume (1).pdf"}, page_content="KHIZER DAWOOD\nSUMMARY\nI am from AI background, with strong experience in machine learning and data science. I completed a Machine\nLearning internship at Unified Mentor India, where I gained hands-on experience in deploying machine learning\nmodels and implementing CI/CD workflows. I am further enhancing my skills through a Data Science Lab course\nfrom WorldQuant University. Additionally, I have practical experience working with Tableau for data visualization\nand dashboard creation, strengthening my ability to turn data insights into actionable results.\nSKILLS\nWORK EXPERIENCE\nPython Programming: Data Analysis , automation , API development (Flask, FlaskAPI)\nMachine Learning: Data Ingestion , Data Transformation, Model Training, Feature Engineering, Feature Selection,\nModel Deployment.\nAI & ML Frameworks: Scikit-learn, pandas, Numpy, Seaborn, Pytorch, pySpark, TensorFlow.\nTableau:

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
def text_split(filter_docs):
    splitter=RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=30
    )
    text_chunks=splitter.split_documents(filter_docs)
    return text_chunks

In [8]:
texts_chunks=text_split(filter_docs)
print(f"number of chunks {len(texts_chunks)}")

number of chunks 15


In [9]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
embeddings=OpenAIEmbeddings()
vector_store=FAISS.from_documents(texts_chunks,embeddings)


In [10]:
retriever=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":5})

In [16]:
from langchain.prompts import PromptTemplate
prompt=PromptTemplate(
    template="""
You are a helpful assistant. 
You can answer from the given context text. 
If you don't have sufficient context, just say don't know. 
1. Give the Resume summary
2. job fit Score
3. Suggested improvements (skills, keywords)
{context}\n
Job Description: {job_description}
""",
input_variables=["context","job_description"]
)

In [12]:
from langchain_openai import ChatOpenAI
model=ChatOpenAI()

In [22]:
from langchain.schema.runnable import RunnableLambda,RunnablePassthrough,RunnableParallel
parallel_chain=RunnableParallel(
    {
        "context": retriever | RunnableLambda(lambda doc: "\n\n".join([d.page_content for d in doc])),
        "job_description":RunnablePassthrough()
    }
)


In [None]:
main_chain=parallel_chain|prompt|model
result = main_chain.invoke("""""")
print(result)

1. Resume Summary:
Khizer Dawood is an AI/Machine Learning Engineer with hands-on experience in machine learning and data science. He completed a Machine Learning internship at Unified Mentor India, where he gained experience in deploying machine learning models using scikit-learn and TensorFlow. He has developed skills in machine learning model deployment and building end-to-end ML pipelines.

2. Job Fit Score:
Based on the provided information, Khizer Dawood seems to be a good fit for the AI/ML Engineer position. He has experience in working with ML frameworks such as scikit-learn and TensorFlow, as well as in developing prediction models and conducting research. He also has skills in database management and web development, which could be useful in this role.

3. Suggested Improvements:
- Enhance skills in PyTorch and other ML frameworks in addition to TensorFlow.
- Increase proficiency in weightage-based algorithms and LLM fine-tuning.
- Gain more experience with clustering and cla