In [None]:
# Install dependencies
!pip install transformers langchain langchain-community faiss-cpu sentence-transformers pypdf PyPDF2 langchain-huggingface

Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Collecting langchain-core<1.0.0,>=0.3.55 (from langchain)
  Downloading langchain_core-0.3.59-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-

In [None]:
# Import modules
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import os
from google.colab import files

In [None]:
uploaded = files.upload()
# Load the CTSE lecture notes PDF
pdf_path = "CTSE_Lecture_Notes.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()

Saving CTSE_Lecture_Notes.pdf to CTSE_Lecture_Notes (3).pdf


In [None]:
# Split text into chunks for embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

In [None]:
# Create embeddings and store in FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Initialize GPT-2 and setup text generation pipeline
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Ensure GPT-2 uses padding token if missing
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

generation_pipeline = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    device=0 if torch.cuda.is_available() else -1,
    truncation=True
)

#llm = HuggingFacePipeline(pipeline=generation_pipeline)

Device set to use cuda:0


In [None]:
# Generate Response
def CTSE_Chatbot(question, top_k=5, max_new_tokens=128):
    results = vectorstore.similarity_search(question, k=top_k)
    context = "\n\n".join([doc.page_content for doc in results])

    prompt = f"""
Below is some context extracted from 'Current Trends in Software Engineering' module's lecture notes. Based on this, answer the question clearly and informatively.

Context:
{context}

Student Question:
{question}

Provide a well-structured, relevant, and educational answer:
Answer:
"""

    #print("Prompt before sending to GPU:", prompt)
    response = generation_pipeline(prompt, max_new_tokens=max_new_tokens, do_sample=True, top_p=0.9, temperature=0.7)[0]['generated_text']

    # Extract only the answer part (everything after 'Answer:')
    if "Answer:" in response:
        return response.split("Answer:")[-1].strip()
    else:
        return response.strip()

In [None]:
# Example
question = "What are the benefits of containerization in DevOps?"
response = CTSE_Chatbot(question)

print("Question:", question + "\n")
print("Answer:", response)

Question: What are the benefits of containerization in DevOps?

Answer: * Containerization can help you to deploy the same application across different 

environments without needing to redesign it.

* Containerization can help you to deploy the same application across different environments without needing to redesign it.

* Containerization can help you to deploy the same application across different environments without needing to redesign it.

* Containerization can help you to deploy the same application across different environments without needing to redesign it.

* Containerization can help you to deploy the same application across different environments without needing to redesign it.

* Containerization can help you to deploy the same application across different


In [None]:
# Example
question = "What is Container orchestration"
response = CTSE_Chatbot(question)

print("Question:", question + "\n")
print("Answer:", response)

Question: What is Container orchestration

Answer: Containers are a simple and fast way to deploy applications.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage your IT infrastructure.

Containers are also a powerful way to manage and manage


In [None]:
# Example
question = "What is a container?"
response = CTSE_Chatbot(question)

print("Question:", question + "\n")
print("Answer:", response)

Question: What is a container?

Answer: A container is a data structure that can be used to store, access, and organize information.

A container can contain a lot of different things, but most of them are useful for one purpose:

• To store data and data management.

• To manage information and data for a variety of tasks.

• To store, manage, and retrieve information.

• To store, manage, and retrieve data.

A container can be used to store data that is used to process data.

A container can be used to store data that is stored in a database.

A container


In [None]:
# Example
question = "what are Advantages of Microservice Architecture?"
response = CTSE_Chatbot(question)

print("Question:", question + "\n")
print("Answer:", response)

Question: what are Advantages of Microservice Architecture?

Answer: Microservices: A Natural Progression

• Emergence from Real-World Use: Microservices weren’t pre-planned 
but evolved as a response to practical needs in software 

development.

• Responding to Change: Offering the agility and flexibility to adapt to 
new technologies and market demands.

Understanding Microservices 

• Core Characteristics

• Small and Focused: Aimed at doing one thing well, avoiding sprawling 
codebases.

• Cohesion and Single Responsibility: Adhering to the principle of 
grouping related code
