In [25]:
from langchain_community.document_loaders import PyPDFLoader


In [26]:
loader = PyPDFLoader("BD.pdf")
data = loader.load()

In [27]:
data

[Document(metadata={'source': 'BD.pdf', 'page': 0}, page_content="Bangladesh: An Overview XXI\nBANGLADESH: An Overview\nHistorical\nBackground\nThe history of Bangladesh is an eventful combination of turmoil and peace, as well as prosperity and\ndestitution. It has thrived under the glow of cultural splendor and suffered under the ravages of war. The\nterritory now constituting Bangladesh was under the Muslim rule over five and a half centuries from 1201\nto 1757 A.D. Then it was ruled by the British, after the defeat of the last sovereign ruler of Bengal, Nawab\nSiraj ud-Daulah, at the Battle of Plassey on the fateful day of June 23, 1757. The British ruled over the\nentire Indian sub-continent including this territory for nearly 190 years from 1757 to 1947. During that\nperiod Bangladesh was a part of the British Indian provinces of Bengal and Assam. With the termination\nof the British rule in August, 1947 the sub-continent was partitioned into India and Pakistan. Bangladesh\nthen b

In [28]:
len(data)

10

In [37]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_spliter =RecursiveCharacterTextSplitter(chunk_size=500)
douc= text_spliter.split_documents(data)
len(douc)

93

In [38]:
douc[3]

Document(metadata={'source': 'BD.pdf', 'page': 0}, page_content="then became a part of Pakistan and was named as East Pakistan. It remained so far about 24 years from\nAugust 14, 1947 to March 25, 1971. It appeared on the world map as an independent and sovereign\nstate named Bangladesh on December 16, 1971 following victory at the War of Liberation (from March\n25 to December 16, 1971).\nGeographical Bangladesh lies in the north eastern part of South Asia between 20o34' and 26 o 38' north latitude")

In [39]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
load_dotenv()

True

In [40]:
import os

In [41]:
api_key = os.getenv("GOOGLE_API_KEY")
embeddings = GoogleGenerativeAIEmbeddings(api_key=api_key, model="models/embedding-001")


vector = embeddings.embed_query("do you know Bangladesh!")
vector[:5]

[0.04201357811689377,
 -0.03638027608394623,
 -0.03496973589062691,
 -0.007133544888347387,
 0.0791676938533783]

In [42]:
vectorstore = Chroma.from_documents(documents=douc, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 7})

retrieved_docs = retriever.invoke("Do you know the country how many kilometers of rail-way?")

In [44]:
len(retrieved_docs)

7

In [45]:
print(retrieved_docs[5].page_content)

The country has a network of radio and television broadcasting. There were two television stations in
Dhaka and Chattogram under government and it increased to 17 stations. Presently 31 television
channels are broadcasting under the government & private ownerships. The television system was
introduced in 1965 and since then sub-stations have been set up in Sylhet, Khulna, Rangpur,
Mymensingh, Natore, Noakhali, Satkhira, Cox’s Bazar, Rangamati, Thakurgaon, Patuakhali,


In [46]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm =ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.4,max_tokens=200)

In [17]:
pip install langchain


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [47]:
import requests

query = "retrieval chain"
response = requests.get(f"https://pypi.org/search/?q={query}")
print(response.text)  # This will show search results as HTML


<!DOCTYPE html>
<html>
  <head>
    <meta
      http-equiv="Content-Security-Policy"
      content="default-src 'self'; img-src 'self' data:; media-src 'self' data:; object-src 'none'; style-src 'self' 'sha256-o4vzfmmUENEg4chMjjRP9EuW9ucGnGIGVdbl8d0SHQQ='; script-src 'self' 'sha256-a9bHdQGvRzDwDVzx8m+Rzw+0FHZad8L0zjtBwkxOIz4=';"
    />
    <link
      href="/_fs-ch-1T1wmsGaOgGaSxcX/assets/inter-var.woff2"
      rel="preload"
      as="font"
      type="font/woff2"
      crossorigin
    />
    <link href="/_fs-ch-1T1wmsGaOgGaSxcX/assets/styles.css" rel="stylesheet" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1, maximum-scale=1"
    />
    <style>
      #loading-error {
        font-size: 16px;
        font-family: 'Inter', sans-serif;
        margin-top: 10px;
        margin-left: 10px;
        display: none;
      }
    </style>
  </head>
  <body>
    <noscript>
      <div class="noscript-container">
        <div class="noscript-content">
       

In [50]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)



In [51]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [53]:
response = rag_chain.invoke({"input": "What are the major rivers in Bangladesh?"})
print(response["answer"])

The Padma, Jamuna, Teesta, Brahmaputra, Surma, Meghna, and Karnaphuli are major rivers in Bangladesh.  These rivers have 230 tributaries, totaling about 24,140 kilometers in length.  The alluvial soil benefits from the heavy silt deposits these rivers bring during the rainy season.

