In [17]:
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
import os

In [18]:
# Load environment variables from the .env file
load_dotenv()

# # Access the variables
gemini_api_key = os.getenv("GEMINI_API_KEY")

#### Step 1: Load and Parse Resume (PDF/Text)

In [19]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("Rushikesh_Resume_DS_2025.pdf")
documents = loader.load()

#You can also allow users to upload .txt files or extract from DOCX if needed.

#### Step 2: Split Resume into Chunks

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

docs = splitter.split_documents(documents)

#### Step 3: Generate Embeddings using Gemini

In [21]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize the embeddings object
gemini_embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=gemini_api_key
)


#### Step 4: Store Embeddings in a Vector Store (Chroma)

In [24]:
vector_store = Chroma.from_documents(
    embedding=gemini_embeddings,
    documents=docs,
    persist_directory='rag_resume_db',
    collection_name='sample'
)

#### Step 5: Set up Retriever

In [25]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

#### Step 6: Load Gemini LLM

In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_model = ChatGoogleGenerativeAI(model='gemini-1.5-flash', google_api_key=gemini_api_key, temperature=0.3)


#### Step 7: Create the RAG Chain

In [27]:
from langchain.chains import RetrievalQA

rag_chain = RetrievalQA.from_chain_type(
    llm=gemini_model,
    retriever=retriever,
    chain_type="stuff"  # can also try "map_reduce" or "refine"
)


#### Step 8: Ask Questions!

In [28]:
query = "What is my experience in natural language processing?"
answer = rag_chain.run(query)

In [29]:
print(answer)

Your experience in natural language processing (NLP) includes developing an NLP-based classification system using Gemini 1.5 Flash and GPT-4o Mini to automate lost lead categorization.  This system achieved 82% accuracy on 2,000 samples, validated with the call support team.  You also optimized prompting strategies and monitored hallucination/response drift to enhance LLM reliability.
