# CTSE Lecture Notes Chatbot
### Student Name: Samarasinghe.V.R
### Student ID: IT21321368

### Install required packages

In [1]:
!pip install langchain langchain-google-genai langchain-community pypdf chromadb sentence-transformers




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Import Libraries

In [2]:
import os
import re
from IPython.display import display, Markdown
from langchain_google_genai import GoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


### Set up API Key

In [3]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyBbfMMLujYxLEVUKHtc8to3BpvAfNm8EN4"  

### List available models

In [4]:
import google.generativeai as genai

# Set API key
genai.configure(api_key="AIzaSyBbfMMLujYxLEVUKHtc8to3BpvAfNm8EN4")

# List available models
for model in genai.list_models():
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-04-17
models/gemini-2.5-flash-preview-04-17-thinking
models/gemini-2.5-pro-preview-05-06
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-p

### Load CTSE Lecture PDFs

In [5]:
def load_ctse_pdf():
    """Load your CTSE PDF from the specific assignment directory"""
    # specific directory path
    directory_path = "D:\\4th year\\2nd sem\\CTSE\\Assignment 2\\lecture_notes"
    
    documents = []
    
    # Make sure the directory exists
    if not os.path.exists(directory_path):
        print(f"Directory not found: {directory_path}")
        return documents
    
    # Find all PDF files in the directory
    pdf_files = [f for f in os.listdir(directory_path) if f.endswith('.pdf')]
    
    if not pdf_files:
        print(f"No PDF files found in {directory_path}")
        return documents
    
    if len(pdf_files) > 1:
        print(f"Found {len(pdf_files)} PDF files. Using the first one: {pdf_files[0]}")
    else:
        print(f"Found PDF: {pdf_files[0]}")
    
    # Take the first PDF file found
    pdf_file = pdf_files[0]
    pdf_path = os.path.join(directory_path, pdf_file)
    
    try:
        # Load the PDF
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        
        # Add source metadata to each document
        for doc in documents:
            doc.metadata["source"] = pdf_file
            # Make sure the page number is available
            if "page" not in doc.metadata:
                doc.metadata["page"] = "unknown"
        
        print(f"Successfully loaded {len(documents)} pages from {pdf_file}")
    except Exception as e:
        print(f"Error loading {pdf_file}: {str(e)}")
    
    return documents


### CTSE Lecture Notes Q&A Chatbot Implementation

In [None]:
class CTSEChatbot:
    def __init__(self):
        """Initialize the CTSE chatbot"""
        print("\n" + "="*70)
        print("\033[1m\033[94m📚 INITIALIZING CTSE LECTURE NOTES CHATBOT 📚\033[0m".center(70))
        print("="*70)
        
        # Load documents
        print("\033[93m📂 Loading CTSE PDF...\033[0m")
        self.documents = load_ctse_pdf()
        
        if not self.documents:
            raise ValueError("❌ No documents were loaded. Please check your PDF file.")
        
        # Split documents into chunks
        print("\033[93m🔄 Processing documents...\033[0m")
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,  # Smaller chunks for more precise retrieval
            chunk_overlap=100
        )
        self.splits = text_splitter.split_documents(self.documents)
        print(f"\033[92m✅ Split into {len(self.splits)} chunks\033[0m")
        
        # Create vector store
        print("\033[93m🔍 Creating vector store...\033[0m")
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        
        # Use a specific directory for the database
        db_directory = "ctse_db"
        if not os.path.exists(db_directory):
            os.makedirs(db_directory)
        
        self.vector_store = Chroma.from_documents(
            documents=self.splits,
            embedding=embeddings,
            persist_directory=db_directory
        )
        
        # Create retriever
        self.retriever = self.vector_store.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 4}  # Retrieve top 4 chunks
        )
        
        # Create LLM
        print("\033[93m🧠 Setting up AI model...\033[0m")
        self.llm = GoogleGenerativeAI(model="gemini-2.0-flash-lite", temperature=0.2)
        
        # Create prompt template
        self.prompt = ChatPromptTemplate.from_template("""
        You are a helpful teaching assistant for a Current Trends in Software Engineering (CTSE) course.
        Answer the following question based only on the provided context from the CTSE lecture notes.
        If you don't know the answer or can't find it in the context, please say "I don't have enough information to answer this question." 
        Don't make up information.
        
        For each piece of information you use, cite the source using the format [Page: page_number].
        
        Context:
        {context}
        
        Question: {input}
        
        Answer (with page citations):
        """)
        
        # Create the document chain and retrieval chain
        self.document_chain = create_stuff_documents_chain(self.llm, self.prompt)
        self.chain = create_retrieval_chain(self.retriever, self.document_chain)
        
        print("\033[1m\033[92m✅ Chatbot initialization complete and ready to answer questions!\033[0m")
    
    def ask(self, question):
        """Process a question and return an answer"""
        if not question.strip():
            return "Please enter a question."
        
        try:
            # Get the raw response
            response = self.chain.invoke({"input": question})
            
            # Format the response for better display
            answer = response["answer"]
            
            # Extract and highlight sources
            sources = re.findall(r'\[Page: (.*?)\]', answer)
            unique_sources = list(set(sources))
            
            if unique_sources:
                footer = "\n\n**Sources:** Pages " + ", ".join(unique_sources)
                # Clean up the answer by removing the inline citations
                clean_answer = re.sub(r'\[Page: .*?\]', '', answer).strip()
                return clean_answer + footer
            else:
                return answer
                
        except Exception as e:
            return f"Error processing your question: {str(e)}"
    
    def run_interactive(self):
        """Run an interactive chat session"""
        print("\n" + "="*50)
        print("\033[1m\033[94m🤖 CTSE Lecture Notes Chatbot 🤖\033[0m".center(50))
        print("="*50)
        print("\033[92m✓ Ask any questions about your CTSE lecture notes")
        print("✓ Get answers with specific page references")
        print("✓ Type 'exit', 'quit', or 'bye' to end the session\033[0m")
        
        while True:
            print("\n\033[1m\033[96m" + "-"*50 + "\033[0m")
            print("\n")
            question = input("\033[1m❓ Your question: \033[0m")
            
            if question.lower() in ['exit', 'quit', 'bye']:
                print("\n\033[93m👋 Thank you for using the CTSE Chatbot! Goodbye!\033[0m")
                break
                
            print("\n\033[95m🔍 Searching lecture notes...\033[0m")
            answer = self.ask(question)
            print("\n\033[1m\033[92m💡 Answer:\033[0m")
            display(Markdown(answer))

# Example usage
def main():
    try:
        # Initialize chatbot
        chatbot = CTSEChatbot()
        
        # Run sample questions
        print("\n" + "="*70)
        print("\033[1m\033[96m🔍 SAMPLE QUESTIONS DEMONSTRATION 🔍\033[0m".center(70))
        print("="*70)
        sample_questions = [
            "What are the main topics covered in CTSE?",
            "Explain the importance of software engineering practices.",
            "What are the current trends in software development?"
        ]
        
        for i, question in enumerate(sample_questions, 1):
            print(f"\n\033[1m\033[95m❓ Sample Question {i}:\033[0m")
            print(f"\033[95m   {question}\033[0m")
            print("\n\033[93m🔍 Searching lecture notes...\033[0m")
            
            answer = chatbot.ask(question)
            print("\n\033[1m\033[92m💡 Answer:\033[0m")
            display(Markdown(answer))
            print("\n" + "\033[90m" + "•" * 70 + "\033[0m")
        
        # Start interactive mode
        chatbot.run_interactive()
        
    except Exception as e:
        print(f"Error: {str(e)}")

# Run the main function when executed
if __name__ == "__main__":
    main()


       [1m[94m📚 INITIALIZING CTSE LECTURE NOTES CHATBOT 📚[0m       
[93m📂 Loading CTSE PDF...[0m
Found PDF: CTSE_Lecture_Notes.pdf
Successfully loaded 408 pages from CTSE_Lecture_Notes.pdf
[93m🔄 Processing documents...[0m
[92m✅ Split into 382 chunks[0m
[93m🔍 Creating vector store...[0m
[93m🧠 Setting up AI model...[0m
[1m[92m✅ Chatbot initialization complete and ready to answer questions![0m

           [1m[96m🔍 SAMPLE QUESTIONS DEMONSTRATION 🔍[0m            

[1m[95m❓ Sample Question 1:[0m
[95m   What are the main topics covered in CTSE?[0m

[93m🔍 Searching lecture notes...[0m

[1m[92m💡 Answer:[0m


I don't have enough information to answer this question.



[90m••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••[0m

[1m[95m❓ Sample Question 2:[0m
[95m   Explain the importance of software engineering practices.[0m

[93m🔍 Searching lecture notes...[0m

[1m[92m💡 Answer:[0m


I don't have enough information to answer this question.



[90m••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••[0m

[1m[95m❓ Sample Question 3:[0m
[95m   What are the current trends in software development?[0m

[93m🔍 Searching lecture notes...[0m

[1m[92m💡 Answer:[0m


The current trends in software development are:
*   Continuous Integration (CI) 
*   Continuous Delivery (CD) 
*   Continuous Deployment (CD) 
*   Microservices

**Sources:** Pages 1, 2, 3, 4


[90m••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••[0m

   [1m[94m🤖 CTSE Lecture Notes Chatbot 🤖[0m    
[92m✓ Ask any questions about your CTSE lecture notes
✓ Get answers with specific page references
✓ Type 'exit', 'quit', or 'bye' to end the session[0m

[1m[96m--------------------------------------------------[0m




[1m❓ Your question: [0m what is docker?



[95m🔍 Searching lecture notes...[0m

[1m[92m💡 Answer:[0m


Docker provides the ability to package and run applications within a loosely isolated environment which is a container . It is a container engine (runtime + tool for managing containers and images) . Docker provides tooling and a platform to manage the lifecycle of your containers . You can develop your apps and supporting components using containers . You can distribute and test your apps as a container . You have the ability to deploy your app as a container or an orchestrated service, in whatever environment which supports Docker installation . It shares the same OS kernel . It works on all major Linux Distributions and containers native to Windows Server (specific versions) .

**Sources:** Pages 1


[1m[96m--------------------------------------------------[0m




[1m❓ Your question: [0m what is kubernetes?



[95m🔍 Searching lecture notes...[0m

[1m[92m💡 Answer:[0m


Kubernetes (k8s) is an open-source platform for automating deployment, scaling, and management of containers at scale . It was created by Google as an open-source container orchestration platform .

**Sources:** Pages 1, 2, 3, 4


[1m[96m--------------------------------------------------[0m


