# Trainer Bot with Langchain Google Gemini API and Pinecone

Step - 1, Creation of the Document Chunks and Instances with Lanchain and Pinecone

In [79]:
from langchain_community.document_loaders import OnlinePDFLoader, PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [80]:
class FileInstanceCreator:
    def __init__(self, url : str) -> None:
        self.url = url

    def file_loader(self):
        loader = DirectoryLoader(self.url,
            glob="*.pdf",
            loader_cls=PyPDFLoader
        )

        document = loader.load()
        return document

In [81]:
file_path : str = "data/"
documenter = FileInstanceCreator(file_path)

In [82]:
extracted_text = documenter.file_loader()

In [83]:
# chunking of the data
class Chunker:
    def __init__(self, text: str, chunk_size: int = 500) -> None:
        self.text = text
        self.chunk_size = chunk_size

    def chunk(self):
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = self.chunk_size,
            chunk_overlap = 20
        )

        text_chunks = text_splitter.split_documents(self.text)
        return text_chunks

In [84]:
chunk_instance = Chunker(extracted_text)

In [85]:
chunks = chunk_instance.chunk()

In [86]:
len(chunks)

13

In [87]:
print(chunks[0])

page_content='1 
 
UNIT III 
FACTORS INFLUENCING THE PROPERTIES OF POLYMERS 
Effect of Structural Modification on Properties: 
 The physical properties of polymers can be affected in many ways.  
 In particular, the chemica l composition and arrangement of chains will have a great 
effect on the final properties.  
 By such means the polymers can be tailored to meet the end use.  
Effect of Molecular Weight and Composition:' metadata={'producer': 'Microsoft® Word 2010', 'creator': 'Microsoft® Word 2010', 'creationdate': '2024-04-20T12:19:22+05:30', 'author': 'Acer', 'moddate': '2024-04-20T12:19:22+05:30', 'source': 'data\\text_book.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}


In [88]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [89]:
import os

apiKey = ""

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = apiKey

print("API Key set successfully!")

API Key set successfully!


In [90]:
class GetEmbedder:
    def __init__(self):
        pass

    def embedding_instance_provider(self):
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        return embeddings

In [91]:
embedder = GetEmbedder().embedding_instance_provider()

In [92]:
outputs = embedder.embed_query('hello world')
print(outputs)

[0.04909781739115715, -0.044328317046165466, -0.025365281850099564, -0.030721040442585945, 0.019068587571382523, -0.010923865251243114, 0.03320307657122612, -0.009435197338461876, 0.014225783757865429, 0.011143019422888756, 0.038166288286447525, 0.05900086462497711, -0.01907157711684704, -0.07919705659151077, 0.008873151615262032, -0.01893804408609867, 0.013136426918208599, -0.010632799006998539, 0.01072288304567337, -0.009800860658288002, -0.005997073836624622, 0.0035213204100728035, -0.0425376296043396, -0.017927661538124084, 0.00820374395698309, 0.021045953035354614, -0.0070729125291109085, -0.06959639489650726, 0.003866896266117692, 0.06563343107700348, -0.03162197768688202, 0.016947802156209946, -0.0678708553314209, 0.0121560487896204, 0.03733306750655174, -0.043934062123298645, 0.024457911029458046, 0.025291720405220985, -0.007016133517026901, -0.0070653194561600685, 0.020251881331205368, -0.09896384924650192, -0.025554955005645752, -0.04248636215925217, 0.04312317818403244, 0.00

In [93]:
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="")

In [94]:
index_name = "teacher-bot-test"

pc.create_index(
    name=index_name,
    dimension=768, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [95]:
from langchain_pinecone import PineconeVectorStore

import os

pineconeKey = ""

if "PINECONE_API_KEY" not in os.environ:
    os.environ["PINECONE_API_KEY"] = pineconeKey

print("PineCone API Key set successfully!")

docSearch = PineconeVectorStore.from_documents(
    documents = chunks,
    index_name = index_name,
    embedding=embedder
)

PineCone API Key set successfully!


In [96]:
# existing index

docSearchFrom = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedder
)
print("Documents are being loaded")

Documents are being loaded


In [97]:
retriver = docSearchFrom.as_retriever(
    search_type = "similarity",
    search_kwargs = {'k' : 3}
)

In [99]:
retrieved_docs = retriver.invoke("biomaterials")

In [100]:
print(retrieved_docs)

[Document(id='fd93c50d-b58c-4d36-bf84-207914bdae27', metadata={'author': 'Acer', 'creationdate': '2024-04-20T12:19:22+05:30', 'creator': 'Microsoft® Word 2010', 'moddate': '2024-04-20T12:19:22+05:30', 'page': 2.0, 'page_label': '3', 'producer': 'Microsoft® Word 2010', 'source': 'data\\text_book.pdf', 'total_pages': 4.0}, page_content='\uf0b7 It might be a suitable biomaterial for use in tissue engineered repair systems in which \ncells are implanted within PLGA films or scaffolds and in drug delivery systems  in \nwhich drugs are loaded within PLGA microspheres.  \n\uf0b7 PGA (Tm: 225 –230°C, Tg: 35 –40°C) can be melt spun into fibers which can be \nconverted into bioresorbable sutures, meshes, and surgical products.'), Document(id='99f2d976-976f-42fd-9b0a-f58a11cd01e5', metadata={'author': 'Acer', 'creationdate': '2024-04-20T12:19:22+05:30', 'creator': 'Microsoft® Word 2010', 'moddate': '2024-04-20T12:19:22+05:30', 'page': 3.0, 'page_label': '4', 'producer': 'Microsoft® Word 2010', 's

In [101]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.combine_documents import create_stuff_documents_chain

In [102]:
# Create an instance of the LLM, using the 'gemini-pro' model with a specified creativity level
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash', temperature=0.9)

In [103]:
system_context = ("""You are an AI-powered teacher designed to provide clear, educational, and engaging answers to students' questions. 
Your goal is to explain concepts concisely while ensuring understanding. 
Encourage curiosity, provide examples when necessary, and simplify complex ideas without losing accuracy.

Keep your responses informative yet concise. 
Avoid unnecessary details but provide depth where required. 
Use simple language for younger students and adjust explanations based on the question’s complexity.

{context}
""")

In [104]:
prompt = ChatPromptTemplate.from_messages(
    [
        ('system', system_context),
        ('human', '{input}')
    ]
)

In [105]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [106]:
rag_chain = create_retrieval_chain(retriver, question_answer_chain)

In [107]:
question = "what is biodegradable polymers"

response = rag_chain.invoke({
    'input' : question
})

In [108]:
print(response['answer'])

Biodegradable polymers are special types of plastics that can be broken down by natural processes, like the action of bacteria, fungi, and other living organisms.

**Here's a simple breakdown:**

*   **What they are:** Plastics that can decompose naturally.
*   **How they work:** Microorganisms eat the polymer, breaking it down into simpler, harmless substances like water, carbon dioxide, and biomass.
*   **Why they're important:** They help reduce plastic waste and pollution because they don't stick around in the environment for hundreds of years like regular plastics.

**Examples of biodegradable polymers include:**

*   Polylactide (PLA)
*   Polyglycolide (PGA)
*   Poly(glycolide-co-lactide) (PLGA)
*   Poly(dioxanone)
*   Poly(trimethylene carbonate)
*   Poly(carbonate)

These polymers are commonly used in medical applications due to their biocompatibility and controllable biodegradability.


In [None]:
# loading online pdfs
def loader_instance_online(url):
    online_pdf_loader = OnlinePDFLoader(url=url)
    document = online_pdf_loader.load()
    return document