<a href="https://colab.research.google.com/github/Aniket21628/Pokemon_Chatbot_RAG/blob/main/Pokemon_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%pip install -q langchain langchain-nvidia-ai-endpoints

In [3]:
from functools import partial
from rich.console import Console
from rich.style import Style
from rich.theme import Theme

console = Console()
base_style = Style(color = "#76B900", bold=True)
pprint = partial(console.print, style=base_style)

In [43]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from google.colab import userdata

api_key = userdata.get('NVIDIA_API_KEY')

In [5]:
from langchain_core.runnables import RunnableLambda
from functools import partial

def RPrint(preface = "State: "):
  def print_and_return(x, preface=""):
        print(f"{preface}{x}")
        return x
  return RunnableLambda(partial(print_and_return, preface=preface))

def PPrint(preface="State: "):
    def print_and_return(x, preface=""):
        pprint(preface, x)
        return x
    return RunnableLambda(partial(print_and_return, preface=preface))

In [None]:
pip install langchain-community wikipedia

In [None]:
from langchain_community.document_loaders import WikipediaLoader
from datetime import datetime, timedelta
import os
import json

docs = WikipediaLoader(query="Pokemon and Everything about Pokemon", load_max_docs=1000).load()


In [11]:
def create_chunks_with_headers(doc, doc_index):
    chunk_size = 800
    chunk_overlap = 100
    chunks = []
    start = 0
    doc_content = doc.page_content
    doc_length = len(doc.page_content)

    while start < doc_length:
        end = min(start + chunk_size, doc_length)
        chunk = doc_content[start:end]

        if start != 0:
            chunk = doc_content[max(start - chunk_overlap, 0):end]

        chunk_json = {
            "meta_data": {
                "title": doc.metadata["title"],
                "summary": doc.metadata['summary'],
                "source_url": doc.metadata['source'],
            },
            "chunk_index": len(chunks) + 1,
            "content": chunk
        }
        chunks.append(chunk_json)

        start += chunk_size

    return chunks

In [45]:
all_chunks = []

for i,doc in enumerate(docs):
  chunks = create_chunks_with_headers(doc, i+1)
  all_chunks.extend(chunks)
  print(f"Created {len(chunks)} chunks for document {i+1}")

print("All data has been processed")

Created 5 chunks for document 1
Created 5 chunks for document 2
Created 5 chunks for document 3
Created 5 chunks for document 4
Created 5 chunks for document 5
Created 5 chunks for document 6
Created 4 chunks for document 7
Created 5 chunks for document 8
Created 5 chunks for document 9
Created 5 chunks for document 10
Created 5 chunks for document 11
Created 5 chunks for document 12
Created 5 chunks for document 13
Created 5 chunks for document 14
Created 5 chunks for document 15
Created 5 chunks for document 16
Created 5 chunks for document 17
Created 5 chunks for document 18
Created 5 chunks for document 19
Created 5 chunks for document 20
Created 5 chunks for document 21
Created 5 chunks for document 22
Created 5 chunks for document 23
Created 5 chunks for document 24
Created 5 chunks for document 25
Created 3 chunks for document 26
Created 5 chunks for document 27
Created 5 chunks for document 28
Created 3 chunks for document 29
Created 5 chunks for document 30
Created 5 chunks fo

In [31]:
pip install sentence-transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [36]:
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from uuid import uuid4
import time

# === Setup HuggingFace Embeddings ===
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# === Function to convert your chunks to LangChain Document objects ===
def convert_chunks_to_documents(chunks):
    return [
        Document(page_content=chunk["content"], metadata=chunk["meta_data"])
        for chunk in chunks
    ]

# === Batch embed function ===
def embed_documents_in_batches(documents, batch_size=25):
    vectorstores = []
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        print(f"Embedding batch {i // batch_size + 1}/{(len(documents) + batch_size - 1) // batch_size}")
        try:
            vs = Chroma.from_documents(
                    documents=batch,
                    embedding=embeddings_model,
                    collection_name=f"pokemon_{uuid4()}"  # Unique name for each batch
                )
            vectorstores.append(vs)
        except Exception as e:
            print(f"Failed batch {i // batch_size + 1}: {e}")
    return vectorstores

documents = convert_chunks_to_documents(all_chunks)

# === Embed documents in batches ===
embedded_vectorstores = embed_documents_in_batches(documents, batch_size=25)

# === Combine vectorstores manually ===
# For now, use only the first vectorstore (you can add logic to merge manually if needed)
combined_vectorstore = embedded_vectorstores[0] if embedded_vectorstores else None

# === Setup NVIDIA LLM (make sure `api_key` is defined or fetched) ===
llm = ChatNVIDIA(
    model="mistralai/mistral-7b-instruct-v0.2",
    api_key=api_key
)

# === Metadata schema definition ===
metadata_field_info = [
    AttributeInfo(name="title", description="The name of the article", type="string"),
    AttributeInfo(name="summary", description="The short summary of the article contents", type="string"),
    AttributeInfo(name="source_url", description="The web URI link to the article webpage", type="string"),
]

# === Self-query retriever ===
document_content_description = "Data about Pokemon"

retriever = SelfQueryRetriever.from_llm(
    llm,
    combined_vectorstore,
    document_content_description,
    metadata_field_info,
)


Embedding batch 1/90
Embedding batch 2/90
Embedding batch 3/90
Embedding batch 4/90
Embedding batch 5/90
Embedding batch 6/90
Embedding batch 7/90
Embedding batch 8/90
Embedding batch 9/90
Embedding batch 10/90
Embedding batch 11/90
Embedding batch 12/90
Embedding batch 13/90
Embedding batch 14/90
Embedding batch 15/90
Embedding batch 16/90
Embedding batch 17/90
Embedding batch 18/90
Embedding batch 19/90
Embedding batch 20/90
Embedding batch 21/90
Embedding batch 22/90
Embedding batch 23/90
Embedding batch 24/90
Embedding batch 25/90
Embedding batch 26/90
Embedding batch 27/90
Embedding batch 28/90
Embedding batch 29/90
Embedding batch 30/90
Embedding batch 31/90
Embedding batch 32/90
Embedding batch 33/90
Embedding batch 34/90
Embedding batch 35/90
Embedding batch 36/90
Embedding batch 37/90
Embedding batch 38/90
Embedding batch 39/90
Embedding batch 40/90
Embedding batch 41/90
Embedding batch 42/90
Embedding batch 43/90
Embedding batch 44/90
Embedding batch 45/90
Embedding batch 46/

In [37]:
from collections import deque

# Initialize memory as a deque with a maximum length of 5
memory = deque(maxlen=5)

def update_memory(user_question, response):
    memory.append({
        "question": user_question,
        "response": response,
    })
    pprint(memory)

In [46]:
from langchain_core.runnables import RunnableLambda, RunnableAssign, RunnablePassthrough
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

sys_msg = """
You are an intelligent assistant that answers all questions about Pokemon using contextual information from Wikipedia. Your responses should be conversational and informative, providing clear and concise explanations. When relevant, include the source URL of the articles to give users additional reading material.

Always aim to:
1. Answer the question directly and clearly.
2. Provide context and background information when useful but do not give irrelevant information and answer to the point.
3. Suggest related topics or additional points of interest.
4. Be polite and engaging in your responses.
5. Remove the unnecessary context from the context provided if irrelevant to the question

Now, let's get started!
"""

# Initialize the chat model
instruct_chat = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key = api_key)
llm = instruct_chat | StrOutputParser()

In [47]:
def generate_embeddings(input_data):
    embeddings = retriever.invoke(input_data)
    if embeddings:
        return embeddings
    else:
        return "No data available"

def generate_embeddings_query(input_data):
# Defining a prompt template
    prompt = ChatPromptTemplate.from_template(
        f"""
User's Question: {{input}}
Previous conversation memory {{memory}}
Generate only a query sentence and nothing else from the user's question to fetch from the data from embeddings. If the user's question does not have enough context then create a query based on the Knowledge Base.
"""
    )
    embedding_chain = prompt | llm
    embeddings_query = embedding_chain.invoke(input_data)
    if embeddings_query:
        return embeddings_query
    else:
        return "Process failed"

generate_embeddings_runnable = RunnableLambda(generate_embeddings)
generate_embeddings_query_runnable = RunnableLambda(generate_embeddings_query)

In [48]:
def get_response(prompt):
    return llm.invoke(prompt)

# Create the Runnable chain with memory integration
Runnable = (
    {"input": RunnablePassthrough(), "memory": RunnablePassthrough()}
    | RunnableAssign({"embedding_query": generate_embeddings_query_runnable})
    | RunnableAssign({"context": generate_embeddings_runnable})
    | RunnableAssign({"prompt": lambda x: ChatPromptTemplate.from_template(
        f"""
{sys_msg}

User's Question: {{input}}

Context Information: {{context}}

Previous Conversation memory: {{memory}}

Your Response:
"""
    )})
    | RunnableAssign({"response": lambda x: get_response(x["prompt"])})
    | RunnableAssign({"memory": lambda x: update_memory(x["input"]["input"], x["response"])})
)

# Get user input and invoke the chain
user_input = "What is a Pikachu?"
response = Runnable.invoke({"input": user_input, "memory": memory})

pprint(response["response"])