In [None]:
from PyPDF2 import PdfReader  #  Reads PDF files page by page.
from langchain_community.embeddings import HuggingFaceEmbeddings  #Converts text into embeddings (numerical vectors).
from langchain.text_splitter import CharacterTextSplitter  # Splits long text into smaller chunks (so embeddings don’t cut mid-sentence)
from langchain.vectorstores import FAISS   # Facebook’s Vector DB library → stores embeddings for fast similarity search.
import google.generativeai as genai  #  Google Gemini SDK (Generative AI API).

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
gemini_key = os.environ.get("GEMINI_KEY")

genai.configure(api_key=gemini_key)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
# FUNCTION TO READ PDF AND SPLIT INTO PARAGRAPHS

def split_paragraphs(rawText):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=200,
        chunk_overlap=20,
        length_function=len,
    )
    return text_splitter.split_text(rawText)

"""
Breaks text into chunks of 200 characters with an overlap of 20.
Overlap prevents context loss between chunks.
"""


def load_pdfs(pdfs):
    text_chunks = []
    """
    Loops over each PDF → extracts text page by page.
    Splits into chunks.
    Collects all chunks into a list.
    Prints first 5 for verification.
    """

    for pdf in pdfs:
        reader = PdfReader(pdf)
        for page in reader.pages:
            raw = page.extract_text()
            chunks = split_paragraphs(raw)
            text_chunks += chunks
    print("HERE WE HAVE AN EXAMPLE OF WHAT WE HAVE IN CHUNKS")
    print(text_chunks[:5])  # Display first 5 chunks for verification
    return text_chunks 

In [None]:
list_of_pdfs = ["Chapter 1.pdf"]
text_chunks = load_pdfs(list_of_pdfs)

"""
Reads Chapter 1.pdf.
Converts it into small text_chunks.
"""

HERE WE HAVE AN EXAMPLE OF WHAT WE HAVE IN CHUNKS
['Notes By – Prof. S.A. Langote  \nUnit No.1: Introduction to Computer Networks  \n \n➢ Uses of Computer Networks  \no Business Applications  \nMost companies have a substantial number of computers. For', 'example, a company may have a computer for each worker and use them \nto design products, write brochures, and do the payroll. Initially, some of', 'these computers may have worked in isolation from the others, but at some \npoint, management may have decided to connect them to be able to \ndistribute information throughout the company.', 'Put in slightly more general form, the issue here is resource sharing. \nThe goal is to make all programs, equipment, and especially data available', 'to anyone on the network without regard to the physical location of the \nresource or the user. An obvious and wides pread example is having a group']


In [None]:
store = FAISS.from_texts(text_chunks, embeddings)  #   Takes your text chunks → converts into embeddings → stores them in FAISS DB.
store.save_local("./myvectorstore") #  Saves the vector DB to ./myvectorstore for reuse.

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI   # Wrapper for Gemini chat models (like gemini-1.5-flash).
from langchain.chains import RetrievalQA  #   LangChain chain that connects retriever + LLM → full RAG pipeline.

In [8]:
store = FAISS.load_local("myvectorstore", embeddings, allow_dangerous_deserialization=True)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0, google_api_key=gemini_key)

chain = RetrievalQA.from_chain_type(llm=llm, retriever=store.as_retriever())
result = chain({"query": "Explain Computer Network Reference model."})
print(result)

{'query': 'Explain Computer Network Reference model.', 'result': 'Based on the provided text, there is mention of the ISO OSI (Open Systems Interconnection) Reference Model.  The text states that it deals with connecting open systems, and that it was revised in 1995. However, no further details about the model itself are given.'}
