In [None]:
!pip install transformers faiss-cpu torch fitz


In [None]:
!pip uninstall pymupdf
! pip install pymupdf


In [None]:
# Install required libraries (make sure you run these in Colab first)
!pip install transformers torch fitz


In [None]:
import torch
from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer
import fitz  # PyMuPDF

import warnings
warnings.filterwarnings("ignore")

import time
import textwrap

qa_pipeline = pipeline("question-answering")

# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    return " ".join([page.get_text() for page in doc])

# Break the full text into chunks to avoid token overflow
def chunk_text(text, chunk_size=500):
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

# Generate an answer from a chunk using GPT-2
def answer_question_with_qa_model(question, context_chunk):
    try:
        result = qa_pipeline(question=question, context=context_chunk)
        return result['answer']
    except:
        return ""


    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Only return what's after "Answer:"
    return generated_text.split("Answer:")[-1].strip()

# Chatbot class that uses all the above
class PDFChatbot:
    def __init__(self, pdf_path):
        print("🔍 Loading and processing PDF...")
        self.text_chunks = chunk_text(extract_text_from_pdf(pdf_path))
        print(f"✅ Loaded {len(self.text_chunks)} chunks.")

    def chat(self, question):
      best_response = ""
      best_score = 0

      for chunk in self.text_chunks:
        response = answer_question_with_qa_model(question, chunk)
        score = len(response.strip())

        if score > best_score:
            best_score = score
            best_response = response

      return best_response or "Sorry, I couldn't find an answer in the PDF."


# === Run the chatbot ===
pdf_path = "/content/Seizure Detection and Probability Prediction using Random Forests.pdf"
chatbot = PDFChatbot(pdf_path)

def pretty_print_response(response):
    print("\nChatbot 🤖: ", end="")
    for char in response:
        print(char, end="", flush=True)
        time.sleep(0.01)  # Typing effect
    print("\n")  # Add a new line at the end for spacing

print("\n✨ Chatbot is ready to answer your questions! Type 'exit', 'quit', or 'stop' to end.\n")

while True:
    user_input = input("🧠 You: ")
    if user_input.lower() in ["exit", "quit", "stop"]:
        print("Chatbot 🤖: Goodbye! 👋")
        break
    response = chatbot.chat(user_input)

    # Wrap long lines (optional)
    wrapped_response = textwrap.fill(response, width=80)
    pretty_print_response(wrapped_response)