In [5]:
pip install openai PyPDF2 numpy scikit-learn ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Collecting PyPDF2
  Using cached pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Using cached pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.1
[notice] To update, run: C:\Program Files\Python313\python.exe -m pip install --upgrade pip


In [16]:
# %% [markdown]
# # CTSE Lecture Notes Chatbot
# 
# This chatbot answers questions based on CTSE lecture notes using OpenRouter's LLM API.

# %%
# Import required libraries
import os
import openai
from PyPDF2 import PdfReader
from typing import List
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output


In [20]:
# Configuration
class Config:
    OPENROUTER_API_KEY = "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"  # Replace with your actual API key
    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
    MODEL_NAME = "mistralai/mistral-7b-instruct"  # Cost-effective model
    TEMPERATURE = 0.3  # Controls randomness of responses
    MAX_TOKENS = 1000  # Limit response length
    CONTEXT_TOKENS = 3000  # Max context to send to LLM
    CHUNK_SIZE = 500  # Size of text chunks for processing

In [21]:
# Initialize OpenAI client for OpenRouter
client = openai.OpenAI(
    base_url=Config.OPENROUTER_BASE_URL,
    api_key=Config.OPENROUTER_API_KEY
)

In [22]:
# Document Processing Functions
def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from a PDF file"""
    text = ""
    try:
        with open(pdf_path, "rb") as file:
            reader = PdfReader(file)
            for page in reader.pages:
                text += page.extract_text() or ""  # Handle None returns
    except Exception as e:
        print(f"Error reading PDF {pdf_path}: {str(e)}")
    return text

def chunk_text(text: str, chunk_size: int = Config.CHUNK_SIZE) -> List[str]:
    """Split text into manageable chunks"""
    words = text.split()
    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

def create_vector_index(text_chunks: List[str]):
    """Create TF-IDF vector index for semantic search"""
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(text_chunks)
    return vectorizer, tfidf_matrix


In [23]:
# Knowledge Base Class
class LectureNotesKB:
    def __init__(self):
        self.notes = {}
        self.vectorizers = {}
        self.tfidf_matrices = {}
        self.chunks = {}
        
    def add_lecture(self, lecture_name: str, pdf_path: str):
        """Add a lecture to the knowledge base"""
        text = extract_text_from_pdf(pdf_path)
        if not text:
            print(f"Warning: No text extracted from {pdf_path}")
            return
            
        chunks = chunk_text(text)
        vectorizer, tfidf_matrix = create_vector_index(chunks)
        
        self.notes[lecture_name] = text
        self.chunks[lecture_name] = chunks
        self.vectorizers[lecture_name] = vectorizer
        self.tfidf_matrices[lecture_name] = tfidf_matrix
        print(f"Loaded lecture: {lecture_name} ({len(chunks)} chunks)")
        
    def get_relevant_chunks(self, lecture_name: str, query: str, top_k: int = 3) -> List[str]:
        """Retrieve most relevant text chunks for a query"""
        if lecture_name not in self.notes:
            print(f"Lecture {lecture_name} not found in knowledge base")
            return []
            
        vectorizer = self.vectorizers[lecture_name]
        tfidf_matrix = self.tfidf_matrices[lecture_name]
        
        query_vec = vectorizer.transform([query])
        similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
        top_indices = similarities.argsort()[-top_k:][::-1]
        
        return [self.chunks[lecture_name][i] for i in top_indices]

In [24]:
# Chatbot Class
class LectureChatbot:
    def __init__(self):
        self.knowledge_base = LectureNotesKB()
        self.conversation_history = []
        
    def load_lecture(self, lecture_name: str, pdf_path: str):
        """Load lecture notes into the knowledge base"""
        self.knowledge_base.add_lecture(lecture_name, pdf_path)
        
    def generate_response(self, lecture_name: str, question: str) -> str:
        """Generate a response to the user's question"""
        # Get relevant context from lecture notes
        relevant_chunks = self.knowledge_base.get_relevant_chunks(lecture_name, question)
        context = "\n\n".join(relevant_chunks)[:Config.CONTEXT_TOKENS]
        
        # Prepare the prompt
        system_prompt = f"""You are an expert teaching assistant for the CTSE course. 
        Answer the student's question based strictly on the provided lecture notes context.
        If the answer isn't in the notes, say "I don't have that information in my lecture notes."
        
        Lecture Context:
        {context}
        """
        
        # Add to conversation history
        self.conversation_history.append({"role": "user", "content": question})
        
        try:
            # Call OpenRouter API
            response = client.chat.completions.create(
                model=Config.MODEL_NAME,
                messages=[
                    {"role": "system", "content": system_prompt},
                    *self.conversation_history
                ],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            
            answer = response.choices[0].message.content
            self.conversation_history.append({"role": "assistant", "content": answer})
            return answer
            
        except Exception as e:
            return f"Error generating response: {str(e)}"


In [25]:
# UI Components
def create_chat_interface(chatbot: LectureChatbot, lecture_name: str):
    """Create interactive chat interface"""
    output = widgets.Output()
    question_input = widgets.Text(
        placeholder='Ask a question about the lecture...',
        layout=widgets.Layout(width='80%')
    )
    
    def on_submit(_):
        with output:
            question = question_input.value
            if question.strip():
                display(Markdown(f"**You:** {question}"))
                answer = chatbot.generate_response(lecture_name, question)
                display(Markdown(f"**Assistant:** {answer}"))
                question_input.value = ''
    
    submit_button = widgets.Button(description="Ask", layout=widgets.Layout(width='20%'))
    submit_button.on_click(on_submit)
    
    input_box = widgets.HBox([question_input, submit_button])
    display(widgets.VBox([output, input_box]))


In [28]:
# Main Execution
def main():
    # Initialize chatbot
    chatbot = LectureChatbot()
    
    # Load lecture notes (replace with actual paths)
    LECTURE_NOTES = {
        "CTSE Lectures": "ctse_lecture_notes.pdf",
       #"AI in SE": "lectures/ai_in_se.pdf",
       # "DevOps": "lectures/devops.pdf"
    }
    
    # Create directory if it doesn't exist
    os.makedirs("lectures", exist_ok=True)
    
    # Load available lectures
    for name, path in LECTURE_NOTES.items():
        if os.path.exists(path):
            chatbot.load_lecture(name, path)
        else:
            print(f"Lecture file not found: {path}")
    
    # Create dropdown for lecture selection
    available_lectures = [name for name, path in LECTURE_NOTES.items() if os.path.exists(path)]
    if not available_lectures:
        print("Error: No lecture files found in 'lectures' directory")
        return
    
    lecture_dropdown = widgets.Dropdown(
        options=available_lectures,
        description='Select Lecture:',
        disabled=False
    )
    
    # Display UI
    display(lecture_dropdown)
    
    def on_lecture_change(change):
        if change['type'] == 'change' and change['name'] == 'value':
            clear_output(wait=True)
            display(lecture_dropdown)
            create_chat_interface(chatbot, change['new'])
    
    lecture_dropdown.observe(on_lecture_change)
    create_chat_interface(chatbot, lecture_dropdown.value)

In [29]:
# Run the chatbot
if __name__ == "__main__":
    main()

Loaded lecture: Software Architecture (5 chunks)


Dropdown(description='Select Lecture:', options=('Software Architecture',), value='Software Architecture')

VBox(children=(Output(), HBox(children=(Text(value='', layout=Layout(width='80%'), placeholder='Ask a question…