# Developing an LLM application using Langchain

In [None]:
# Run this cell so you do not see any warnings
import warnings
warnings.filterwarnings('ignore')

## Task 1: Import Libraries

In [None]:
import google.generativeai as genai
from IPython.display import display
from IPython.display import Markdown
import textwrap
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

genai.configure(api_key = 'AIzaSyAy8poogkPamgm-pBjs5MLu7KMhL0NxouI')
model = genai.GenerativeModel('gemini-2.0-flash-exp')


## Task 2: Ask The Questions Using Prompts

In [None]:
response = model.generate_content("Explain about Generative AI to a 20 year old kid.")
Markdown(response.text)

## Task 3: Chat With Gemini And Retrieve The Chat History

In [None]:
session = model.start_chat()
response = session.send_message("Hi! Give me a recipe to make a margeritta pizza from scratch.")
Markdown(response.text)

for message in session.history:
    print(message)
    print('\n\n')
message.parts[0].text

model.count_tokens("Now please help me find the nearest supermarket from where I can buy the ingredients.")

## Task 4: Experiment With The Temperature Parameter

In [None]:
def get_response(prompt, generation_config={}):
    response = model.generate_content(contents=prompt, 
    generation_config=generation_config)
    return response

for temp in [0.0, 0.25, 0.5]:
  config = genai.types.GenerationConfig(temperature=temp)
  result = get_response("Explain the concepts of XGBoost and Random Forest with real-life use cases", generation_config=config)

  print(f"\n\nFor temperature value {temp}, the results are: \n\n")
  display(Markdown(result.text))

## Task 5: Experiment With Maximum Output Tokens

In [None]:
def get_response(prompt, generation_config={}):
    response = model.generate_content(contents=prompt, 
    generation_config=generation_config)
    return response

max_tokens = 500
for temp in [0.0, 0.25, 0.5]:
  config = genai.types.GenerationConfig(temperature=temp, max_output_tokens=max_tokens)
  result = get_response("Explain the concepts of XGBoost and Random Forest with real-life use cases", generation_config=config)
  print(f"\n\nFor temperature value {temp}, the results are: \n\n")
  display(Markdown(result.text))

## Task 6: Experiment With the top_k Parameter

In [None]:
def get_response(prompt, generation_config={}):
    response = model.generate_content(contents=prompt, generation_config=generation_config)
    return response

MAX_TOKENS = 500
TOP_K = 3
for temp in [0.0, 1.0]:
  config = genai.types.GenerationConfig(temperature=temp, max_output_tokens=MAX_TOKENS, top_k=TOP_K)
  result = get_response("Explain the concepts of Class in Python for a 15 year old high school student with real-life use cases", generation_config=config)
  print(f"\n\nFor temperature value {temp}, the results are: \n\n")
  display(Markdown(result.text))

## Task 7: Experiment With the top_p Parameter

In [None]:
def get_response(prompt, generation_config={}):
    response = model.generate_content(contents=prompt, generation_config=generation_config)
    return response

MAX_TOKENS = 500
TOP_K = 3
TOP_P = 0.6
for temp in [0.0, 1.0]:
  config = genai.types.GenerationConfig(temperature=temp, max_output_tokens=MAX_TOKENS, top_k=TOP_K, top_p=TOP_P)
  result = get_response("Explain the concepts of Class in Python for a 15 year old high school student with real-life use cases", generation_config=config)
  print(f"\n\nFor temperature value {temp}, the results are: \n\n")
  display(Markdown(result.text))

## Task 8: Experiment With the candidate_count Parameter

In [None]:
def get_response(prompt, generation_config={}):
    response = model.generate_content(contents=prompt, generation_config=generation_config)
    return response

MAX_TOKENS = 500
TOP_K = 3
TOP_P = 0.6
CANDIDATE_COUNT = 1
for temp in [0.0, 1.0]:
  config = genai.types.GenerationConfig(temperature=temp, max_output_tokens=MAX_TOKENS, top_k=TOP_K, top_p=TOP_P, candidate_count = CANDIDATE_COUNT)
  result = get_response("Explain the concepts of Class in Python for a 15 year old high school student with real-life use cases", generation_config=config)
  print(f"\n\nFor temperature value {temp}, the results are: \n\n")
  display(Markdown(result.text))

## Task 9: Introduction to Retrieval Augmented Generation

## Task 10: Load the PDF and Extract the Texts

In [None]:
CHUNK_SIZE = 700
CHUNK_OVERLAP = 100
pdf_path = "https://www.analytixlabs.co.in/assets/pdfs/Data_Engineering%20&_Other_Job_Roles-AnalytixLabs.pdf"

In [None]:
pdf_loader = PyPDFLoader(pdf_path)
split_pdf_document = pdf_loader.load_and_split()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
context = "\n\n".join(str(p.page_content) for p in split_pdf_document)
texts = text_splitter.split_text(context)

## Task 11: Create the Gemini Model and Create the Embeddings

In [None]:
gemini_model = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', google_api_key= "AIzaSyAy8poogkPamgm-pBjs5MLu7KMhL0NxouI" , temperature=0.0)

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key= "AIzaSyAy8poogkPamgm-pBjs5MLu7KMhL0NxouI" )


In [None]:
vector_index = Chroma.from_texts(texts, embeddings)
retriever = vector_index.as_retriever(search_kwargs={"k" : 5})

## Task 12: Create the RAG Chain and Ask Query

In [None]:
qa_chain = RetrievalQA.from_chain_type(gemini_model, retriever=retriever, return_source_documents=True)


In [None]:
question = "Which tools do Data Engineers primarily work with?"
result = qa_chain.invoke({"query": question})
print("Answer:", result["result"])