In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-pro",api_key=os.environ.get("GOOGLE_API_KEY"))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=os.environ.get("GOOGLE_API_KEY"))

In [3]:
# Helper function to format and print document content
def pretty_print_docs(docs):
    # Print each document in the list with a separator between them
    print(
        f"\n{'-' * 100}\n".join(  # Separator line for better readability
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]  # Format: Document number + content
        )
    )

In [4]:
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [5]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def load_pdf(file_path):
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    all_splits = text_splitter.split_documents(docs)
    return all_splits

documents=load_pdf("Minakshee_Narayankar_Resume.pdf")

# Configure text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

# Split documents into chunks
texts = text_splitter.split_documents(documents)



# Create FAISS index from documents and set up retriever
retriever = FAISS.from_documents(texts, embeddings).as_retriever(
    search_kwargs={"k": 10}
)

# Define the query
query = "what are the skills present in the resume ?"

# Execute the query and retrieve results
docs = retriever.invoke(query)

# Display the retrieved documents
pretty_print_docs(docs)

Document 1:

• Led a seminar for 200+ junior peers, covering this project with concepts of computer vision and object detection.
TECHNICAL SKILLS
----------------------------------------------------------------------------------------------------
Document 2:

• Led a seminar for 200+ junior peers, covering this project with concepts of computer vision and object detection.
TECHNICAL SKILLS
Software Development: Python, OOPS, Django, Flask, PostgreSQL, c++, HTML, CSS.
Artificial Intelligence: Generative AI, Natural Language Processing, Computer Vision, Machine Learning, Deep Learning.
Developer Tools: HuggingFace, Google Colaboratory, Visual Studio Code, Jupyter Notebook, Pycharm, Github.
Cloud Services: Azure, AWS
ACHIEVEMENTS
----------------------------------------------------------------------------------------------------
Document 3:

Cloud Services: Azure, AWS
ACHIEVEMENTS
• Received Customer Appreciation and team awards for developing a time-series application and demonstrating l

In [6]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

# Initialize the model
model = HuggingFaceCrossEncoder(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")

# Select the top 3 documents
compressor = CrossEncoderReranker(model=model, top_n=3)

# Initialize the contextual compression retriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)



To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [7]:
# Retrieve compressed documents
compressed_docs = compression_retriever.invoke("what are the skills present in the resume?")

# Display the documents
pretty_print_docs(compressed_docs)

Document 1:

• Led a seminar for 200+ junior peers, covering this project with concepts of computer vision and object detection.
TECHNICAL SKILLS
----------------------------------------------------------------------------------------------------
Document 2:

• Led a seminar for 200+ junior peers, covering this project with concepts of computer vision and object detection.
TECHNICAL SKILLS
Software Development: Python, OOPS, Django, Flask, PostgreSQL, c++, HTML, CSS.
Artificial Intelligence: Generative AI, Natural Language Processing, Computer Vision, Machine Learning, Deep Learning.
Developer Tools: HuggingFace, Google Colaboratory, Visual Studio Code, Jupyter Notebook, Pycharm, Github.
Cloud Services: Azure, AWS
ACHIEVEMENTS
----------------------------------------------------------------------------------------------------
Document 3:

Cloud Services: Azure, AWS
ACHIEVEMENTS
• Received Customer Appreciation and team awards for developing a time-series application and demonstrating l