In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
QDRANT_HOST = os.getenv("QDRANT_HOST")
QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")

In [2]:
#langsmith - to track my application
LANGCHAIN_TRACING_V2="true"
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

In [3]:
# Document loader
from langchain_community.document_loaders import PyMuPDFLoader, DirectoryLoader

#PyMuPDF fastest in parsling(analyize the text)  #extract images from text in pdf -> ## pip install rapidocr-onnxruntime
loader = DirectoryLoader('Data/', glob="*.pdf", loader_cls=PyMuPDFLoader)  
data = loader.load()
# print(data[0])  #- returns with matadata

# Extract text content from the 'Document' objects
texts = [doc.page_content for doc in data]
# print(texts[0])

In [4]:
# # Text Splitter using Semantic chunking - which uses self-attenation concept in Transformer to create independent chunks
from langchain_experimental.text_splitter import SemanticChunker
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Break Points
text_splitter = SemanticChunker(
    embeddings, breakpoint_threshold_type="percentile"
)

docs = text_splitter.create_documents(texts)

In [5]:
print(docs[0])

page_content='Marvel’s The Avengers\nenglish.com/englishreaders Published by Pearson Education Limited\nMarvel’s The Avengers – Teacher’s notes\u2003 \u2003 1 of 3\nTeacher’s notes\nLEVEL 2\nTeacher Support Programme\n© 2018 MARVEL \nAvengers win and Thor takes Loki and the Tesseract back \nhome to Asgard. Chapter 1: The Other wants to get the Tesseract back \nfrom S.H.I.E.L.D. so he can use its powers to conquer \nmany other worlds. Before taking it to the Other, Loki’s \nplan is to open the portal through space from the \nTesseract, to let the Chitauri fighters enter Earth. After \nconquering Earth, only then will Loki take the Tesseract \nback to the Other. Chapter 2: The Tesseract, which is stored under the \nS.H.I.E.L.D. building, turns on and will not turn off. Agents Fury and Phil Coulson go to Dr.'


In [6]:
# Embidding from HuggingFace

from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

In [7]:
#Creating Qdrant Client

import qdrant_client
from qdrant_client import QdrantClient

client = qdrant_client.QdrantClient(
    QDRANT_HOST, 
    api_key=QDRANT_API_KEY,
)

In [10]:
# creating collections

from qdrant_client import QdrantClient, models

os.environ["QDRANT_COLLECTION_NAME"] = 'Rag-pipeline-5'

client.recreate_collection(
    collection_name=os.environ["QDRANT_COLLECTION_NAME"],
    vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE),
)

True

In [21]:
# creating vector store
from  langchain.vectorstores import Qdrant

vector_store = Qdrant(
    client=client,
    collection_name=os.environ["QDRANT_COLLECTION_NAME"],
    embeddings=embeddings,
)

# Extract text content from 'Document' objects
texts = [doc.page_content for doc in docs]

vector_store.add_texts(texts)

['eed85b99bead4e4e9e95405b5ddf8487',
 '545e9bc57d2d4721be751bce1e059a1d',
 'e4f0a6d7dc9a4284989a7478c68fe960',
 '9c8dd69346134efe888ba3f78b0e1ae9',
 '5615f2565f2c41cdb21090ab8e1b9f5c',
 '2f471c32134140079612d0121e67253d',
 '1e59e3cf2d4c42b891beff3654b45f06',
 '4de0b503488d48dc9d383215140aefb2',
 '8da3598a9b384e5b84f501bcfe6f4f18',
 '590578bf6bcc48829f97630b8e010971',
 'de85f56121214205828322217c8391c1',
 'bac9f8389eea4cf88c8c72e3981f868e',
 'a7d7127433b74a188022a0c4bcc58cd2',
 'ef1673ff70af466581a8a3622e9327e9',
 '9cd48859c5c84183afe3273eeb7818a6',
 '65f71d48169c486eb6d2958a13185f90',
 'e07b475499964ccc86a044c3693a417e',
 '42fcdf55e5ae41c48b0d286f6985a9b6',
 'ead65e91cf284f87aed708b0581c400c',
 'af4fa3d04c7945ed94a6ce557ebc98d3',
 '2d69287b7b904ec98a6c9811035b2b77',
 'efe7bc0b96434bc690651ce05f7bc4db',
 '8a396b13654645e58eda36f4ecadc06f',
 'e6b4ecc1a1d74c34a03d774b3d1c6c79',
 'd6679088a6af41d9bde617f155a7ac33']

In [28]:
# LLM
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    # convert_system_message_to_human=True
    )

In [23]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [29]:
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [30]:

from langchain.schema import retriever
from langchain.chains import create_retrieval_chain

retriever=vector_store.as_retriever()
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [36]:
response=retrieval_chain.invoke({"input":"say about Discussion activities"})
print(response['answer'])

1. **Discuss:** Talk about Nick Fury, S.H.I.E.L.D., and the Avengers. Ask students to answer these questions:
    * What does Nick Fury think about the Avengers?
    * Will they help him? (Chapters 1–2)
2. **Discuss:** Put students in groups and ask them to discuss these questions:
    * What do you know about the Tesseract?
    * Why does S.H.I.E.L.D. want it? (Chapters 1–2)
3. **Discuss:** Ask students to think about how much danger the Avengers put themselves in. Ask:
    * When did Captain America feel afraid for the first time?
    * What was he afraid of?
    * How did the people in the bank feel?
    * How did Iron Man save Manhattan? (Chapters 6–7)
4. **Discuss:** Write these words on the board: S.H.I.E.L.D., Nick Fury, Loki, Iron Man. Discuss with the whole class how each felt about the Avengers at the beginning and at the end of the story. (Chapter 3)
5. **Discuss:** What is Loki’s plan for Dr. Selvig, the iridium, the Stark Building, Agent Barton, Bruce Banner, and Thor? (Ch