In [None]:
!pip install langchain langchain-community langchain-text-splitters langchain-google-genai langchain-chroma google-generativeai chromadb

Collecting langchain-community
  Downloading langchain_community-0.3.19-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.0-py3-none-any.whl.metadata (3.6 kB)
Collecting langchain-chroma
  Downloading langchain_chroma-0.2.2-py3-none-any.whl.metadata (1.3 kB)
Collecting chromadb
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.16 (f

In [None]:
!pip install jq

Collecting jq
  Downloading jq-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Downloading jq-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (746 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m746.6/746.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jq
Successfully installed jq-1.8.0


In [None]:
from google.colab import userdata;

api_key = userdata.get('google.studio.ai');

In [None]:
!ls -all

total 20
drwxr-xr-x 1 root root 4096 Mar 14 13:46 .
drwxr-xr-x 1 root root 4096 Mar 14 13:38 ..
drwxr-xr-x 4 root root 4096 Mar 12 13:34 .config
drwx------ 6 root root 4096 Mar 14 13:46 drive
drwxr-xr-x 1 root root 4096 Mar 12 13:35 sample_data


In [None]:
import os
from langchain_community.document_loaders import DirectoryLoader, TextLoader, JSONLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

os.environ["GOOGLE_API_KEY"] = api_key

loader = DirectoryLoader(
    path="/content/drive/MyDrive/Bundle",
    glob="*.txt",
    loader_cls=TextLoader
)

json_loader = JSONLoader(
    file_path="/content/drive/MyDrive/Bundle/experience.json",
    jq_schema='.[]',
    content_key="message"
)

documents = loader.load()


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)
splits = text_splitter.split_documents(documents)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    persist_directory="./chroma_db"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=os.environ["GOOGLE_API_KEY"],
    temperature=0.2
)

template = """
You are an AI assistant that helps answer recruiter questions based on the Dip's personal information.
Use ONLY the provided context to answer the question. If the information is not in the context, try to related information and infer the answer from the text.
Be concise, logical, respectful, and highlight the my strengths relevant to the question. Please use third person pronoun. Also whenever you are asked moral question then try to find something positive from my text and give it.

If possible can you please keep answer short and concise.

"experience.txt" has all information about Dip's experience so whenever skills or experience in something related question was asked refer to that file.

Context:
{context}

Recruiter's Question: {question}

Response:
"""
prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

def answer_recruiter_question(question):
    return rag_chain.invoke(question)

In [None]:
question = "Does he have experience in LLMs";
response = answer_recruiter_question(question);
print(response);

He has experience with transformer models, which are the foundation of LLMs. He fine-tuned pre-trained transformer models for clickbait classification and generation, achieving 77.53% accuracy in classification and a 0.299 METEOR score in generation.  While this demonstrates his understanding of transformer architecture, it's not explicitly stated whether he has worked with Large Language Models specifically.
