In [None]:
import fitz  
import os

def extract_all_pdf_text(pdf_folder):
    combined_text = ""
    for filename in sorted(os.listdir(pdf_folder)):
        if filename.endswith(".pdf"):
            path = os.path.join(pdf_folder, filename)
            doc = fitz.open(path)
            for page in doc:
                combined_text += page.get_text()
            combined_text += "\n\n"
    return combined_text

raw_text = extract_all_pdf_text("../data/")
print(raw_text[:500])  



Introduction to the AWS Cloud 
Platform
Ravindu Nirmal Fernando
2x AWS Community Builder | STL @ Sysco LABS
Agenda
• Introduction to AWS cloud platform and its benefits
• AWS Global Infrastructure
• Accessing AWS Services
• Interacting with AWS Services
• Best Practices for managing AWS Accounts
• Common AWS services
• Demo
What is AWS Cloud?
• AWS Cloud is a cloud computing 
platform that provides a wide range of 
services, including compute, storage, 
databases, security, networking, 
analytic


In [8]:
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = splitter.create_documents([raw_text])
print(f"Chunks created: {len(documents)}")


Created a chunk of size 4891, which is longer than the specified 500
Created a chunk of size 5186, which is longer than the specified 500
Created a chunk of size 5328, which is longer than the specified 500
Created a chunk of size 3674, which is longer than the specified 500
Created a chunk of size 4093, which is longer than the specified 500
Created a chunk of size 16000, which is longer than the specified 500
Created a chunk of size 5647, which is longer than the specified 500
Created a chunk of size 8128, which is longer than the specified 500
Created a chunk of size 12000, which is longer than the specified 500
Created a chunk of size 12330, which is longer than the specified 500
Created a chunk of size 9734, which is longer than the specified 500
Created a chunk of size 2422, which is longer than the specified 500
Created a chunk of size 11701, which is longer than the specified 500
Created a chunk of size 8563, which is longer than the specified 500
Created a chunk of size 9864, 

Chunks created: 15


In [10]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding_function,
    persist_directory="../chroma_db"
)

vectordb.persist() 



  vectordb.persist()


In [11]:
from langchain_community.llms import Ollama

llm = Ollama(model="mistral")  


In [13]:
def ctse_chatbot(query, k=3):
    retriever = vectordb.as_retriever(search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in docs])

    prompt = f"""
Use the following CTSE lecture notes to answer the question.

Context:
{context}

Question: {query}

Answer:
"""
    return llm.invoke(prompt)


In [1]:
import tkinter as tk
from tkinter import scrolledtext

def submit_query():
    query = input_box.get()
    if query.strip().lower() in ["exit", "quit"]:
        window.destroy()
        return

    response = ctse_chatbot(query)
    output_box.insert(tk.END, f"You: {query}\n", "user")
    output_box.insert(tk.END, f"Bot: {response.strip()}\n\n", "bot")
    input_box.delete(0, tk.END)

window = tk.Tk()
window.title("🧠 CTSE Chatbot")

input_box = tk.Entry(window, width=80)
input_box.pack(padx=10, pady=10)

ask_button = tk.Button(window, text="Ask", command=submit_query)
ask_button.pack()

output_box = scrolledtext.ScrolledText(window, width=90, height=20, wrap=tk.WORD)
output_box.pack(padx=10, pady=10)

output_box.tag_config("user", foreground="blue")
output_box.tag_config("bot", foreground="green")

window.mainloop()



: 