In [1]:
# CTSE Lecture Notes Chatbot - Enhanced Implementation
#
# This notebook implements a chatbot that answers questions about CTSE lecture notes
# using a Retrieval-Augmented Generation (RAG) approach with free LLMs.

# Step 1: Install Required Libraries
# Run this cell to install all the necessary packages
# Note: This step only needs to be run once

!pip install -U langchain langchain-huggingface sentence-transformers chromadb pdfminer.six faiss-cpu transformers torch accelerate tqdm tiktoken

# Step 2: Import Required Libraries

import os
import sys
import logging
from pathlib import Path
import time

# LangChain imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Transformers for running local models
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig
)

# Set up logging with timestamps
logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

print(f"Python version: {sys.version}")
print("All libraries imported successfully!")

# Step 3: Check for GPU Availability and System Resources

# Check for GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert to GB
    print(f"GPU is available: {gpu_name} with {gpu_memory:.2f} GB memory")
else:
    device = torch.device("cpu")
    print("No GPU available, using CPU. This may be slower but will still work.")

# Step 4: Set Up Data Directory and Load PDF Files

# Define data directory
DATA_DIR = Path("./data")

# Create the directory if it doesn't exist
DATA_DIR.mkdir(exist_ok=True)

# Check for PDF files
pdf_files = list(DATA_DIR.glob("**/*.pdf"))
print(f"Found {len(pdf_files)} PDF files in the data directory:")
for file in pdf_files:
    print(f" - {file}")

if len(pdf_files) == 0:
    print("\nNo PDF files found! Please add your lecture notes to the data directory.")
    print("You can add them now and rerun this cell.")
    # Instead of stopping, we'll create a sample text file for testing if no PDFs exist
    sample_file = DATA_DIR / "sample_ctse_content.txt"
    if not sample_file.exists():
        with open(sample_file, "w") as f:
            f.write("""
            # Current Trends in Software Engineering (CTSE) - Sample Content
            
            ## Key Topics
            
            1. Agile Development Methodologies
               - Scrum
               - Kanban
               - Extreme Programming (XP)
               
            2. DevOps Practices
               - Continuous Integration (CI)
               - Continuous Deployment (CD)
               - Infrastructure as Code (IaC)
               
            3. Cloud Computing
               - Software as a Service (SaaS)
               - Platform as a Service (PaaS)
               - Infrastructure as a Service (IaaS)
               
            4. Artificial Intelligence in Software Engineering
               - Machine Learning Integration
               - Automated Testing with AI
               - RAG (Retrieval-Augmented Generation)
            """)
        print(f"Created a sample text file at {sample_file} for testing purposes.")

# Step 5: Load Documents (PDF and Text files)

# Set up document loaders
loaders = [
    DirectoryLoader(DATA_DIR, glob="**/*.pdf", loader_cls=PyPDFLoader),
    DirectoryLoader(DATA_DIR, glob="**/*.txt", loader_cls=TextLoader)
]

# Load all documents
documents = []
for loader in loaders:
    try:
        start_time = time.time()
        docs = loader.load()
        documents.extend(docs)
        elapsed = time.time() - start_time
        print(f"Successfully loaded {len(docs)} documents in {elapsed:.2f} seconds")
    except Exception as e:
        logger.error(f"Error loading documents with {loader.__class__.__name__}: {e}")

print(f"Total documents loaded: {len(documents)}")

# Print a sample of document content to verify loading
if documents:
    print("\nSample document content:")
    sample_doc = documents[0]
    print(f"Document from {sample_doc.metadata.get('source')} - Page {sample_doc.metadata.get('page', 'N/A')}:")
    print(sample_doc.page_content[:300] + "..." if len(sample_doc.page_content) > 300 else sample_doc.page_content)

# Step 6: Process Documents for RAG - Split into Chunks

# Split documents into smaller chunks with better parameters
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]  # Try to split at paragraph boundaries first
)

# Split the documents with progress tracking
print("Splitting documents into chunks...")
start_time = time.time()
chunks = text_splitter.split_documents(documents)
elapsed = time.time() - start_time
print(f"Split {len(documents)} documents into {len(chunks)} chunks in {elapsed:.2f} seconds")

# Print a sample chunk to verify splitting
if chunks:
    print("\nSample chunk:")
    sample_chunk = chunks[0]
    print(f"Chunk from {sample_chunk.metadata.get('source')} - Page {sample_chunk.metadata.get('page', 'N/A')}:")
    print(sample_chunk.page_content[:300] + "..." if len(sample_chunk.page_content) > 300 else sample_chunk.page_content)

# Step 7: Create Embeddings and Vector Store

# Initialize embedding model - this is free and runs locally
print("Loading embedding model...")
start_time = time.time()
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': str(device)}
)
elapsed = time.time() - start_time
print(f"Embedding model loaded successfully in {elapsed:.2f} seconds!")

# Create vector store
print("Creating vector store...")
start_time = time.time()
vectorstore = FAISS.from_documents(chunks, embeddings)
elapsed = time.time() - start_time
print(f"Vector store created successfully with {len(chunks)} chunks in {elapsed:.2f} seconds!")

# Save the vector store to disk for future use
VECTORSTORE_PATH = "./vectorstore"
vectorstore.save_local(VECTORSTORE_PATH)
print(f"Vector store saved to {VECTORSTORE_PATH}")

# Step 8: Set Up Retriever with Enhanced Parameters

# Create retriever from vector store with better parameters
retriever = vectorstore.as_retriever(
    search_type="mmr",  # Maximum Marginal Relevance - better diversity in results
    search_kwargs={
        "k": 5,  # Retrieve top 5 chunks
        "fetch_k": 10,  # Consider top 10 for diversity
        "lambda_mult": 0.7  # Balance between relevance and diversity
    }
)

# Test the retriever with a sample query
if chunks:
    test_query = "What are the main topics of CTSE?"
    print(f"Testing retriever with query: '{test_query}'")
    start_time = time.time()
    retrieved_docs = retriever.invoke(test_query)
    elapsed = time.time() - start_time
    print(f"Retrieved {len(retrieved_docs)} relevant chunks in {elapsed:.2f} seconds")
    
    if retrieved_docs:
        print("\nSample retrieved chunk:")
        sample_retrieved = retrieved_docs[0]
        print(f"From {sample_retrieved.metadata.get('source')} - Page {sample_retrieved.metadata.get('page', 'N/A')}:")
        print(sample_retrieved.page_content[:300] + "..." if len(sample_retrieved.page_content) > 300 else sample_retrieved.page_content)

# Step 9: Load Language Model (LLM) with Error Handling

def load_model(model_name, device, use_4bit=False):
    """Load a language model with error handling and fallbacks"""
    try:
        print(f"Loading model: {model_name}...")
        start_time = time.time()
        
        # Set quantization config if needed
        bnb_config = None
        if use_4bit and device.type == "cuda":
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=torch.bfloat16
            )
        
        # Set model loading parameters
        model_kwargs = {
            "torch_dtype": torch.float16 if device.type == "cuda" else torch.float32,
            "low_cpu_mem_usage": True,
            "device_map": "auto",
            "quantization_config": bnb_config
        }
            
        # Load tokenizer and model
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
            
        elapsed = time.time() - start_time
        print(f"Model loaded successfully in {elapsed:.2f} seconds!")
        return model, tokenizer
    except Exception as e:
        logger.error(f"Error loading model {model_name}: {e}")
        # Try fallback models if the primary one fails
        fallback_models = [
            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            "distilgpt2"
        ]
        
        for fallback in fallback_models:
            if fallback != model_name:
                print(f"Trying fallback model: {fallback}")
                try:
                    return load_model(fallback, device, use_4bit=False)
                except Exception as e:
                    logger.error(f"Failed to load fallback model {fallback}: {e}")
                    continue
        
        raise RuntimeError(f"Failed to load any model: {e}")

# Try to load the model with a try/except block and fallback options
try:
    # Select appropriate model based on available hardware
    if torch.cuda.is_available() and torch.cuda.get_device_properties(0).total_memory > 8e9:  # >8GB VRAM
        model_name = "mistralai/Mistral-7B-v0.1"
        use_4bit = True
    elif torch.cuda.is_available() and torch.cuda.get_device_properties(0).total_memory > 4e9:  # >4GB VRAM
        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
        use_4bit = False
    else:
        model_name = "distilgpt2"
        use_4bit = False
    
    model, tokenizer = load_model(model_name, device, use_4bit)
    
    # Create text generation pipeline with appropriate parameters
    text_generation_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.1,
        top_p=0.95,
        repetition_penalty=1.15,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        device_map="auto"
    )
    
    # Create LangChain wrapper
    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
    print(f"Successfully configured LLM with model: {model_name}")
    
except Exception as e:
    logger.error(f"Failed to load any language model: {e}")
    print("\n⚠️ ERROR: Could not load any language model. The chatbot will not be able to answer questions.")
    print("Please try installing additional libraries or using a smaller model.")
    # Create a dummy LLM for testing purposes
    from langchain.llms.fake import FakeListLLM
    llm = FakeListLLM(responses=["I'm sorry, I couldn't load a language model to answer your question."])
    print("Created a dummy LLM for testing purposes.")

# Step 10: Create Custom Prompt Template for QA with Better Instructions

# Define an improved prompt template that includes better context instructions
template = """
You are an intelligent, helpful teaching assistant specializing in Current Trends in Software Engineering (CTSE).
Your job is to provide accurate, informative answers to questions about CTSE topics based on the lecture notes.

CONTEXT INFORMATION:
{context}

QUESTION: {question}

INSTRUCTIONS:
1. Answer the question using ONLY the information provided in the context.
2. If the context doesn't contain enough information to fully answer the question, say so clearly.
3. Keep your answer focused, clear, and educational.
4. If appropriate, use bullet points or numbered lists to organize information.
5. If the question involves code, provide examples when possible.
6. Do not make up information or cite sources not present in the context.

YOUR ANSWER:
"""

PROMPT = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

# Step 11: Create the QA Chain with Better Error Handling

# Create the QA chain
try:
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",  # 'stuff' simply stuffs all retrieved documents into the prompt
        retriever=retriever,
        return_source_documents=True,  # Return source documents for reference
        chain_type_kwargs={"prompt": PROMPT}
    )
    print("QA Chain created successfully!")
except Exception as e:
    logger.error(f"Error creating QA chain: {e}")
    print("Failed to create QA chain. The chatbot will not be able to answer questions properly.")

# Step 12: Create Enhanced Chat Interface Function with Error Handling

def ask_question(question, retriever=None, qa_chain=None):
    """
    Ask a question to the CTSE chatbot with improved error handling and performance tracking.
    
    Args:
        question (str): The question to ask.
        retriever: Optional retriever for direct document retrieval if qa_chain fails.
        qa_chain: The QA chain to use.
        
    Returns:
        dict: The answer and source documents.
    """
    if not question.strip():
        return {"answer": "Please ask a question about CTSE."}
    
    try:
        # Track timing for performance analysis
        start_time = time.time()
        
        # Check if the question is 'answer' or similar edge cases
        if question.lower() == 'answer':
            raise ValueError("The query 'answer' is not valid.")
        
        # Try to use the QA chain if available
        if qa_chain is not None:
            result = qa_chain.invoke({"query": question})
            
            # Format source information
            sources = []
            for doc in result.get("source_documents", []):
                source = f"From '{os.path.basename(doc.metadata.get('source', 'unknown'))}'"
                if 'page' in doc.metadata:
                    source += f", Page {doc.metadata['page']}"
                if source not in sources:
                    sources.append(source)
            
            # Add source information and timing to the answer
            elapsed = time.time() - start_time
            answer = result["result"]
            
            if sources:
                answer += "\n\nSources: " + ", ".join(sources)
            
            answer += f"\n\n(Response generated in {elapsed:.2f} seconds)"
            
            return {"answer": answer, "sources": sources, "elapsed": elapsed}
        
        # Fallback to retriever-only if QA chain isn't available
        elif retriever is not None:
            docs = retriever.invoke(question)
            answer = "I found these relevant excerpts from the lecture notes:\n\n"
            
            for i, doc in enumerate(docs[:3], 1):  # Show top 3 results
                source = f"From '{os.path.basename(doc.metadata.get('source', 'unknown'))}'"
                if 'page' in doc.metadata:
                    source += f", Page {doc.metadata['page']}"
                    
                answer += f"--- Excerpt {i} ({source}) ---\n"
                answer += doc.page_content.strip()[:300] + "...\n\n"
            
            elapsed = time.time() - start_time
            answer += f"\n(Documents retrieved in {elapsed:.2f} seconds)"
            
            return {"answer": answer, "elapsed": elapsed}
            
        # No components available
        else:
            return {"answer": "Sorry, neither the QA system nor document retriever is available."}
            
    except Exception as e:
        logger.error(f"Error processing question: {e}")
        return {"answer": f"Sorry, I encountered an error while processing your question: {str(e)}"}


# Step 13: Test the Chatbot

# Test with some sample questions
test_questions = [
    "What are the key aspects of agile development?",
    "What is RAG in the context of LLMs?",
    "How can I implement CI/CD in a software project?"
]

for question in test_questions:
    print(f"\n\nQuestion: {question}")
    result = ask_question(question, retriever=retriever, qa_chain=qa_chain)
    print(f"\nAnswer: {result['answer']}")

# Step 14: Interactive Chat Interface with History

from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

# Create widgets for the UI with improved styling
question_input = widgets.Text(
    value='',
    placeholder='Type your question about CTSE here...',
    description='Question:',
    layout=widgets.Layout(width='80%')
)

submit_button = widgets.Button(
    description='Ask',
    button_style='primary',
    tooltip='Ask your question',
    icon='question'
)

clear_button = widgets.Button(
    description='Clear History',
    button_style='warning',
    tooltip='Clear chat history',
    icon='trash'
)

output_area = widgets.Output(
    layout=widgets.Layout(
        border='1px solid #ddd',
        max_height='500px',
        overflow='auto',
        padding='10px'
    )
)

# Add chat history management
chat_history = []

# Define the callback function for the button
def on_submit_button_clicked(b):
    question = question_input.value
    if not question.strip():
        return
    
    # Add question to history and clear input
    chat_history.append(("user", question))
    question_input.value = ''
    
    with output_area:
        clear_output()
        
        # Display chat history
        for role, text in chat_history:
            if role == "user":
                print(f"🧑 You: {text}")
                print()
            else:
                print(f"🤖 CTSE Bot: {text}")
                print("\n" + "-"*50 + "\n")
        
        # Get and display answer
        print("🤖 CTSE Bot: Thinking...")
        
    # Process question outside of output widget to avoid blocking UI
    result = ask_question(question, retriever=retriever, qa_chain=qa_chain)
    answer = result["answer"]
    
    # Add answer to history
    chat_history.append(("bot", answer))
    
    with output_area:
        clear_output()
        
        # Display updated chat history
        for role, text in chat_history:
            if role == "user":
                print(f"🧑 You: {text}")
                print()
            else:
                print(f"🤖 CTSE Bot: {text}")
                print("\n" + "-"*50 + "\n")

# Clear history function
def on_clear_button_clicked(b):
    chat_history.clear()
    with output_area:
        clear_output()
        print("Chat history cleared.")

# Connect the buttons to the callback functions
submit_button.on_click(on_submit_button_clicked)
clear_button.on_click(on_clear_button_clicked)

# Display the UI with improved layout
print("\n\n📚 CTSE Lecture Notes Chatbot")
print("Ask any question about Current Trends in Software Engineering")
display(widgets.HBox([question_input, submit_button, clear_button]))
display(output_area)

with output_area:
    print("Welcome to the CTSE Chatbot! Ask me any question about Current Trends in Software Engineering.")
    print("\n" + "-"*50 + "\n")

# Step 15: Add ability to load new documents without restarting

def load_new_document(file_path):
    """
    Load a new document into the system without restarting
    
    Args:
        file_path (str): Path to the new document
    
    Returns:
        bool: Success status
    """
    try:
        path = Path(file_path)
        if not path.exists():
            print(f"File not found: {file_path}")
            return False
            
        print(f"Loading new document: {path}")
        
        # Choose appropriate loader based on file extension
        if path.suffix.lower() == '.pdf':
            loader = PyPDFLoader(str(path))
        elif path.suffix.lower() == '.txt':
            loader = TextLoader(str(path))
        else:
            print(f"Unsupported file type: {path.suffix}")
            return False
            
        # Load document
        new_docs = loader.load()
        print(f"Loaded {len(new_docs)} pages/documents")
        
        # Split into chunks
        new_chunks = text_splitter.split_documents(new_docs)
        print(f"Split into {len(new_chunks)} chunks")
        
        # Add to vector store
        vectorstore.add_documents(new_chunks)
        print(f"Added to vector store")
        
        # Save updated vector store
        vectorstore.save_local(VECTORSTORE_PATH)
        print(f"Saved updated vector store")
        
        return True
        
    except Exception as e:
        logger.error(f"Error loading new document: {e}")
        print(f"Failed to load document: {str(e)}")
        return False

# Create a function to show how to add new documents
print("\nTo add new documents to the chatbot, use the following code:")
print("load_new_document('path/to/your/new/document.pdf')")

print("\n\n🎉 CTSE Chatbot is ready to use! 🎉")

Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.65.0
    Uninstalling tqdm-4.65.0:
      Successfully uninstalled tqdm-4.65.0
  Rolling back uninstall of tqdm
  Moving to c:\python311\lib\site-packages\tqdm-4.65.0.dist-info\
   from C:\Python311\Lib\site-packages\~qdm-4.65.0.dist-info
  Moving to c:\python311\lib\site-packages\tqdm\
   from C:\Python311\Lib\site-packages\~qdm


ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'C:\\Python311\\Scripts\\tqdm.exe' -> 'C:\\Python311\\Scripts\\tqdm.exe.deleteme'


[notice] A new release of pip is available: 23.1.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Python version: 3.11.4 (tags/v3.11.4:d2340ef, Jun  7 2023, 05:45:37) [MSC v.1934 64 bit (AMD64)]
All libraries imported successfully!
No GPU available, using CPU. This may be slower but will still work.
Found 16 PDF files in the data directory:
 - data\AWS User Groups Colombo - Introduction to AWS Cloud Platform.pdf
 - data\CAP Theorem.pdf
 - data\Cloud Computing 101.pdf
 - data\Cloud Design Patterns - 1.pdf
 - data\Cloud Design Patterns - 2.pdf
 - data\cloud-computing-concepts-technology-amp-architecture-by-thomas-erl.pdf
 - data\Containers 101 (1).pdf
 - data\Intro to DevOps and Beyond (2).pdf
 - data\Introduction to Microservices.pdf
 - data\Key Essentials for Building Application in Cloud.pdf
 - data\Lecture 01-Introduction to AI ML - Updated(2025).pdf
 - data\Lecture 2 - Part 1.pdf
 - data\Lecture 2 - Part 2.pdf
 - data\Microservice Design Patterns.pdf
 - data\ML Lec 2 - Part 1.pdf
 - data\ML Lec 2 - Part 2 LLM.pdf


2025-05-12 13:48:15 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Successfully loaded 966 documents in 17.99 seconds
Successfully loaded 0 documents in 0.00 seconds
Total documents loaded: 966

Sample document content:
Document from data\AWS User Groups Colombo - Introduction to AWS Cloud Platform.pdf - Page 0:
Introduction to the AWS Cloud 
Platform
Ravindu Nirmal Fernando
2x AWS Community Builder | STL @ Sysco LABS
Splitting documents into chunks...
Split 966 documents into 1314 chunks in 0.03 seconds

Sample chunk:
Chunk from data\AWS User Groups Colombo - Introduction to AWS Cloud Platform.pdf - Page 0:
Introduction to the AWS Cloud 
Platform
Ravindu Nirmal Fernando
2x AWS Community Builder | STL @ Sysco LABS
Loading embedding model...
Embedding model loaded successfully in 4.90 seconds!
Creating vector store...


2025-05-12 13:48:42 - INFO - Loading faiss with AVX512 support.
2025-05-12 13:48:42 - INFO - Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
2025-05-12 13:48:42 - INFO - Loading faiss with AVX2 support.
2025-05-12 13:48:42 - INFO - Successfully loaded faiss with AVX2 support.
2025-05-12 13:48:42 - INFO - Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss.


Vector store created successfully with 1314 chunks in 21.38 seconds!
Vector store saved to ./vectorstore
Testing retriever with query: 'What are the main topics of CTSE?'
Retrieved 5 relevant chunks in 0.04 seconds

Sample retrieved chunk:
From data\cloud-computing-concepts-technology-amp-architecture-by-thomas-erl.pdf - Page 151:
resources.
Loading model: distilgpt2...


Device set to use cpu


Model loaded successfully in 1.82 seconds!
Successfully configured LLM with model: distilgpt2
QA Chain created successfully!


Question: What are the key aspects of agile development?

Answer: 
You are an intelligent, helpful teaching assistant specializing in Current Trends in Software Engineering (CTSE).
Your job is to provide accurate, informative answers to questions about CTSE topics based on the lecture notes.

CONTEXT INFORMATION:
Developers
Focused on Agility
Operators
Focused on Stability

This	section	provides	information	that	specifies	the	details	of	the	business	case,
such	as	the	following:
•	
Business	Case	Name
•	
Description
	–	A	brief	summary	of	the	business	case’s	purpose	and	goals.
•	
Sponsor
	–	Identification	of	business	case	stakeholders.
•	
List	of	Revisions	(optional)
	–	Revisions	by	date,	author,	and	approval	if
control	or	historical	logging	is	required.
G.2.	Business	Needs
The	expected	benefits	and	requirements	that	are	to	be	addressed	and	fulfilled	by
cloud	adop

HBox(children=(Text(value='', description='Question:', layout=Layout(width='80%'), placeholder='Type your ques…

Output(layout=Layout(border_bottom='1px solid #ddd', border_left='1px solid #ddd', border_right='1px solid #dd…


To add new documents to the chatbot, use the following code:
load_new_document('path/to/your/new/document.pdf')


🎉 CTSE Chatbot is ready to use! 🎉
