# CTSE Lecture Notes Chatbot – SE4010 Assignment 2
**Name:** Ishara Madusanka

**ID:** IT21189944  
**Year 4 Semester 2, 2025**  


## 1. Setup and Install Packages

In [None]:
%pip install -qU langchain langchain-community langchain-google-genai langchain-chroma pypdf

## 2. Setup API key for gemini

In [None]:
import os
import getpass

# Set your Gemini API key securely
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

## 3. Mount google Drive and Load PDF

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 4. Extract Text From PDF

In [None]:
from langchain_community.document_loaders import PyPDFLoader

# Load your lecture notes PDF
pdf_path = "/content/drive/MyDrive/CTSE/CTSE_Lecture_Notes.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()
print("Found PDF file and text extracted")

Found PDF file and text extracted


## 5. Split Text Into Chunks

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
                                          chunk_size=1000,
                                          chunk_overlap=200)

chunks = splitter.split_documents(documents)

## 6. Generate Embeddings and Store in Vector DB

In [None]:
%pip install --upgrade langchain-chroma



In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma

# Use Gemini's embedding model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Create vector store
vector_store = Chroma(
    collection_name="ctse_notes",
    embedding_function=embeddings,
    persist_directory="./chroma_db"
)

# Add your split lecture notes to the vector store
vector_store.add_documents(chunks)

print(f"✅ Total chunks created: {len(chunks)}")
print("✅ Chunks and embeddings stored in ChromaDB.")

✅ Total chunks created: 383
✅ Chunks and embeddings stored in ChromaDB.


## 7. Initialize Gemini LLM

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Update the model name to "chat-bison-001"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.1)


## 8. Create Retrievel QA Chain

In [None]:
from langchain.chains import RetrievalQA

retriever = vector_store.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)


## 9. Chat Bot

In [None]:
%pip install -q ipywidgets
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML


In [None]:
# Input box
input_box = widgets.Text(
    value='',
    placeholder='Ask something from CTSE lecture notes...',
    description='💬',
    layout=widgets.Layout(flex='1 1 auto', width='auto')
)

# Ask Gemini Button
button = widgets.Button(
    description="Ask Gemini",
    button_style='success',
    tooltip='Submit your question',
    icon='comments',
    layout=widgets.Layout(width='150px')
)

# Chat output area
chat_output = widgets.Output(layout={
    'border': '2px solid #e0e0e0',
    'height': '450px',
    'overflow_y': 'scroll',
    'padding': '10px',
    'background_color': '#f9f9f9'
})

# Chat history
chat_history = []

# Update chat display
def update_chat_display():
    with chat_output:
        clear_output()
        html = """
        <style>
            .chat-box { display: flex; flex-direction: column; font-family: Arial, sans-serif; }
            .user-msg, .gemini-msg {
                margin: 10px 0;
                padding: 10px 15px;
                border-radius: 12px;
                max-width: 95%;
                word-wrap: break-word;
            }
            .user-msg {
                background-color: #e8f0fe;
                color: #1a73e8;
                font-weight: 500;
                align-self: flex-start;
            }
            .gemini-msg {
                background-color: #e6f4ea;
                color: #188038;
                align-self: flex-end;
            }
        </style>
        <div class='chat-box'>
        """
        for q, a in chat_history:
            html += f"<div class='user-msg'>🧔‍♂️<br>{q}</div>"
            html += f"<div class='gemini-msg'>🤖<br>{a}</div>"
        html += "</div>"
        display(HTML(html))

# Common function to process a query
def process_query(query):
    if query:
        input_box.value = ""
        result = qa_chain.invoke(query)  # Assumes you already have `qa_chain` defined
        answer = result['result']
        chat_history.append((query, answer))
        update_chat_display()

# Button click handler
def on_button_click(b):
    process_query(input_box.value.strip())

# Enter key handler
def on_enter_key_entered(text_widget):
    process_query(text_widget.value.strip())

# Bind events
button.on_click(on_button_click)
input_box.on_submit(on_enter_key_entered)

# Layout: Input and Button side by side
input_with_button = widgets.HBox([input_box, button])
spacer = widgets.HTML("<div style='margin-top: 10px;'></div>")

# Final display
display(widgets.VBox([input_with_button, spacer, chat_output]))

VBox(children=(HBox(children=(Text(value='', description='💬', layout=Layout(flex='1 1 auto', width='auto'), pl…