# **AI-RAG Chatbot - Code**

In [None]:
# Install Ollama and LangChain's Ollama integration
!curl -fsSL https://ollama.com/install.sh | sh
!pip install langchain_community

In [21]:
# Start the Ollama service in the background
import subprocess
process = subprocess.Popen(["ollama", "serve"])
#!ollama list

In [None]:
# Pull the llama3 model (as required)
!ollama pull llama3

In [4]:
# Get the Colab notebook URL for Ollama
from google.colab.output import eval_js
notebook_url = eval_js("google.colab.kernel.proxyPort(11434)")

In [5]:
!export OLLAMA_HOST=notebook_url

In [22]:
# Initialize the LLM
from langchain_community.llms import Ollama
llm = Ollama(model="llama3")

In [None]:
!pip install gradio

In [None]:
# Install required libraries
!pip install PyPDF2
!pip install sentence-transformers
!pip install faiss-gpu

# Download the PDF
!wget -O DataStructures_Cheatsheet.pdf "https://www.dropbox.com/scl/fi/pelyq7ygf7uqa0ktjkwoj/DataStructures_Cheatsheet_Zero_To_Mastery_V1.01.pdf?rlkey=fztti2ursdlrm5maqo9wwjnl5&dl=0"

import PyPDF2
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

In [None]:
# Function to extract text from the PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""  # Initialize text as an empty string
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:  # Check if text extraction was successful
                text += page_text
    return text

# Function to split text into chunks
def split_text_into_chunks(text, chunk_size=500):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

# Load pre-trained model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to create embeddings
def create_embeddings(text_chunks):
    return model.encode(text_chunks)

# Function to create FAISS index
def create_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)  # L2 distance
    index.add(embeddings)  # Add embeddings to the index
    return index

# Extract text from the PDF
pdf_path = 'DataStructures_Cheatsheet.pdf'
text = extract_text_from_pdf(pdf_path)

# Create chunks and embeddings
chunks = split_text_into_chunks(text)
embeddings = create_embeddings(chunks)

# Create FAISS index
index = create_faiss_index(embeddings)

# Function to retrieve relevant text (define this function)
def retrieve_relevant_text(query, index, chunks):
    # Process the query to create its embedding
    query_embedding = model.encode([query])

    # Search for the closest embedding in the FAISS index
    distances, indices = index.search(query_embedding, k=5)  # Adjust 'k' as needed
    relevant_chunks = [chunks[i] for i in indices[0]]
    relevant_text = " ".join(relevant_chunks)
    # print(f"Indices Retrieved: {indices}")
    # print(f"Distances Retrieved: {distances}")

    # Return the relevant text chunks as a single string
    return relevant_text


In [24]:
# Function to handle conversation
conversation_history = []

def chatbot_conversation(user_input):
    # Define the base response prompt with CO-STAR framework structure
    base_prompt = (
        "1. Context: You are a helpful chatbot for students learning Data Structures and Algorithms.\n"
        "2. Outcome: Answer my question in 50 words or less, and ask me a follow up question on the same topic.\n"
        "3. Scale: The chatbot will handle queries from students with varying levels of knowledge in DSA, offering tailored explanations and probing questions based on their responses.\n"
        "4. Tone: Maintain a positive and motivational tone throughout, fostering a sense of empowerment and encouragement. It should feel like a friendly guide offering valuable insights.\n"
        "5. Actor: The primary participants are the students asking questions and you, the chatbot providing answers and guidance.\n"
        "6. Resources: The chatbot leverages the llama model and the provided textbook information to generate accurate and insightful responses, along with a structured approach to maintaining context and engaging the student.\n"
        "When responding, make sure to provide an answer and then ask a relevant follow-up question. I should be allowed to change the topic without first answering your follow-up question as long it is still related to Data Structures and Algorithm."
        "If I mention anything irrelevant to Data Structures and Algorithms, decline to answer and tell me to ask a question related to DSA."
    )

    # Store conversation history
    conversation_history.append(f"User: {user_input}")  # Store the user's input
    context = " ".join(conversation_history[-6:])  # Last 3 pairs (6 lines)

    # Retrieve relevant text from the PDF
    relevant_text = retrieve_relevant_text(user_input, index, chunks)

    # Combine base prompt with context and user input
    full_prompt = f"{base_prompt} Context from previous conversation: {context} Relevant information from textbook (For RAG, use only if necessary): {relevant_text} User Input/Response: {user_input}"

    # Logging: Print the breakdown of the full prompt
    # print("Breakdown of Full Prompt:")
    # print("-" * 50)
    # print("Relevant Information from Textbook:")
    # print(relevant_text)
    # print("User Input:")
    # print(user_input)
    # print("-" * 50)

    # Use the LLM to generate a response
    response = llm.invoke(full_prompt)

    # Append the response to the conversation history
    conversation_history.append(f"Chatbot: {response}")  # Store the LLM's response

    return response  # Return the generated response

In [None]:
import gradio as gr

# Create the Gradio interface
interface = gr.Interface(
    fn=chatbot_conversation,
    inputs=gr.Textbox(placeholder="Type your DSA-related question here...", label="User Input"),
    outputs=gr.Textbox(label="Chatbot Response"),
    title="DSA Chatbot",
)

# Launch the Gradio interface with debugging enabled
interface.launch(debug=True)