In [None]:
# PDF Q&A Bot for Google Colab
# This bot allows users to upload PDFs and ask questions about their content

# Install required packages
!pip install gradio langchain openai faiss-cpu PyPDF2 sentence-transformers python-dotenv tiktoken

!pip install -U langchain langchain-community


import gradio as gr
import os
import tempfile
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
import openai
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import List, Tuple
import warnings
warnings.filterwarnings('ignore')

# ================================
# PUT YOUR API KEY HERE
# ================================
OPENAI_API_KEY = "bf14ef12c8d2b063ddadc885fce6562540304efd9d9c4372df942782a626816b"

class PDFQABot:
    def __init__(self):
        self.vector_store = None
        self.documents = []
        self.embeddings = None
        self.llm = None
        self.qa_chain = None
        self.api_key = OPENAI_API_KEY

        # Initialize OpenAI components automatically
        if self.api_key and self.api_key != "your-api-key-here":
            self.setup_openai()

    def setup_openai(self):
        """Setup OpenAI components"""
        try:
            os.environ["OPENAI_API_KEY"] = self.api_key
            openai.api_key = self.api_key

            # Initialize OpenAI components
            self.embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)
            self.llm = OpenAI(temperature=0, openai_api_key=self.api_key)
            self.qa_chain = load_qa_chain(self.llm, chain_type="stuff")

            print("✅ OpenAI API initialized successfully!")
        except Exception as e:
            print(f"❌ Error initializing OpenAI: {str(e)}")

    def extract_text_from_pdf(self, pdf_file) -> str:
        """Extract text from uploaded PDF file"""
        try:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            text = ""

            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text() + "\n"

            return text
        except Exception as e:
            return f"Error extracting text: {str(e)}"

    def process_pdf(self, pdf_file) -> str:
        """Process uploaded PDF and create vector store"""
        if not self.api_key or self.api_key == "your-api-key-here":
            return "❌ Please set your OpenAI API key in the code first"

        if pdf_file is None:
            return "❌ Please upload a PDF file"

        try:
            # Extract text from PDF
            text = self.extract_text_from_pdf(pdf_file)

            if not text.strip():
                return "❌ No text found in the PDF"

            # Split text into chunks
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200,
                length_function=len
            )

            chunks = text_splitter.split_text(text)

            # Create Document objects
            self.documents = [Document(page_content=chunk) for chunk in chunks]

            # Create vector store
            self.vector_store = FAISS.from_documents(
                documents=self.documents,
                embedding=self.embeddings
            )

            return f"✅ PDF processed successfully! Created {len(chunks)} text chunks. Ready to answer questions!"

        except Exception as e:
            return f"❌ Error processing PDF: {str(e)}"

    def ask_question(self, question: str) -> str:
        """Ask a question about the uploaded PDF"""
        if not self.api_key or self.api_key == "your-api-key-here":
            return "❌ Please set your OpenAI API key in the code first"

        if not self.vector_store:
            return "❌ Please upload and process a PDF first"

        if not question.strip():
            return "❌ Please enter a question"

        try:
            # Search for relevant documents
            docs = self.vector_store.similarity_search(question, k=3)

            if not docs:
                return "❌ No relevant information found in the document"

            # Get answer using the QA chain
            answer = self.qa_chain.run(input_documents=docs, question=question)

            return f"**Answer:** {answer}"

        except Exception as e:
            return f"❌ Error answering question: {str(e)}"

    def get_document_info(self) -> str:
        """Get information about the currently loaded document"""
        if not self.documents:
            return "No document loaded"

        total_chars = sum(len(doc.page_content) for doc in self.documents)
        return f"Document loaded with {len(self.documents)} chunks, {total_chars} total characters"

# Initialize the bot
bot = PDFQABot()

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="PDF Q&A Bot", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # 📄 PDF Q&A Bot - Ready to Use!

        Upload a PDF file and ask questions about its content using AI!

        ## How to use:
        1. Upload a PDF file
        2. Wait for processing to complete
        3. Ask questions about the content
        """)

        with gr.Row():
            with gr.Column():
                # PDF upload
                pdf_upload = gr.File(
                    label="Upload PDF",
                    file_types=[".pdf"],
                    type="filepath"
                )

                process_btn = gr.Button("Process PDF", variant="primary", size="lg")
                process_status = gr.Textbox(label="Processing Status", interactive=False)

                # Document info
                doc_info = gr.Textbox(label="Document Info", interactive=False)

            with gr.Column():
                # Question input
                question_input = gr.Textbox(
                    label="Ask a Question",
                    placeholder="What would you like to know about the document?",
                    lines=3
                )

                ask_btn = gr.Button("Ask Question", variant="primary", size="lg")

                # Answer output
                answer_output = gr.Textbox(
                    label="Answer",
                    lines=10,
                    interactive=False
                )

                # Example questions
                gr.Markdown("""
                ### Example Questions:
                - What is the main topic of this document?
                - Can you summarize the key points?
                - What are the conclusions mentioned?
                - Explain [specific concept] from the document
                """)

        # Event handlers
        def process_and_update_info(pdf_file):
            status = bot.process_pdf(pdf_file)
            info = bot.get_document_info()
            return status, info

        process_btn.click(
            fn=process_and_update_info,
            inputs=[pdf_upload],
            outputs=[process_status, doc_info]
        )

        ask_btn.click(
            fn=bot.ask_question,
            inputs=[question_input],
            outputs=[answer_output]
        )

        # Allow pressing Enter to ask question
        question_input.submit(
            fn=bot.ask_question,
            inputs=[question_input],
            outputs=[answer_output]
        )

    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_interface()

    # Launch with public link for Colab
    demo.launch(
        share=True,  # Creates a public link
        server_name="0.0.0.0",
        server_port=7860,
        debug=True
    )

✅ OpenAI API initialized successfully!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0f2b4f1574b6283e5d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://0f2b4f1574b6283e5d.gradio.live


In [None]:
# PDF Q&A Bot for Google Colab using Together.ai
!pip install gradio langchain faiss-cpu PyPDF2 sentence-transformers python-dotenv tiktoken

import gradio as gr
import os
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
from langchain.llms.base import LLM
from typing import List, Optional
import requests
import warnings
warnings.filterwarnings("ignore")

# ================================
# PUT YOUR Together.ai API KEY HERE
# ================================
TOGETHER_API_KEY = "bf14ef12c8d2b063ddadc885fce6562540304efd9d9c4372df942782a626816b"
TOGETHER_API_URL = "https://api.together.xyz/v1/chat/completions"

# Custom LLM Wrapper for Together.ai
class TogetherLLM(LLM):
    model: str = "mistralai/Mixtral-8x7B-Instruct-v0.1"  # You can change model here

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        headers = {
            "Authorization": f"Bearer {TOGETHER_API_KEY}",
            "Content-Type": "application/json",
        }

        data = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.7
        }

        response = requests.post(TOGETHER_API_URL, headers=headers, json=data)
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content'].strip()
        else:
            raise Exception(f"Together.ai error: {response.text}")

    @property
    def _llm_type(self) -> str:
        return "together_custom"

# PDF Q&A Bot class
class PDFQABot:
    def __init__(self):
        self.vector_store = None
        self.documents = []
        self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        self.llm = TogetherLLM()
        self.qa_chain = load_qa_chain(self.llm, chain_type="stuff")

    def extract_text_from_pdf(self, pdf_file) -> str:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text

    def process_pdf(self, pdf_file) -> str:
        if not TOGETHER_API_KEY or TOGETHER_API_KEY == "your-together-api-key":
            return "❌ Please set your Together.ai API key first."

        text = self.extract_text_from_pdf(pdf_file)
        if not text.strip():
            return "❌ No text found in the PDF."

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = text_splitter.split_text(text)
        self.documents = [Document(page_content=chunk) for chunk in chunks]

        self.vector_store = FAISS.from_documents(self.documents, self.embeddings)
        return f"✅ PDF processed successfully! Created {len(chunks)} chunks."

    def ask_question(self, question: str) -> str:
        if not self.vector_store:
            return "❌ Please upload and process a PDF first."
        if not question.strip():
            return "❌ Please enter a question."

        docs = self.vector_store.similarity_search(question, k=3)
        answer = self.qa_chain.run(input_documents=docs, question=question)
        return f"**Answer:** {answer}"

    def get_document_info(self) -> str:
        if not self.documents:
            return "No document loaded."
        total_chars = sum(len(doc.page_content) for doc in self.documents)
        return f"Document has {len(self.documents)} chunks and {total_chars} total characters."

bot = PDFQABot()

def create_interface():
    with gr.Blocks(title="PDF Q&A Bot - Together.ai") as demo:
        gr.Markdown("""
        # 🤖 PDF Q&A Bot (Together.ai)
        Upload a PDF and ask AI questions based on its content.
        """)

        with gr.Row():
            with gr.Column():
                pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
                process_btn = gr.Button("Process PDF")
                process_status = gr.Textbox(label="Status", interactive=False)
                doc_info = gr.Textbox(label="Document Info", interactive=False)

            with gr.Column():
                question_input = gr.Textbox(label="Ask a Question")
                ask_btn = gr.Button("Ask")
                answer_output = gr.Textbox(label="Answer", interactive=False, lines=10)

        process_btn.click(fn=lambda f: (bot.process_pdf(f), bot.get_document_info()), inputs=pdf_upload, outputs=[process_status, doc_info])
        ask_btn.click(fn=bot.ask_question, inputs=question_input, outputs=answer_output)
        question_input.submit(fn=bot.ask_question, inputs=question_input, outputs=answer_output)

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3849108bd0f6b37843.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
