<a href="https://colab.research.google.com/github/Aang-CHO2/gradio_distilGPT2/blob/main/qdistilGPT2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Install necessary libraries in Google Colab
!pip install transformers gradio pdfplumber torch --quiet

import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import pdfplumber  # Optimized PDF processing
import os

# Use a smaller model like DistilGPT-2 or GPT-2
model_name = "distilgpt2"  # You can also use "gpt2" or "flan-t5-small"

# Use GPU if available, else fall back to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Cache for tokenized inputs (to avoid re-tokenizing the same content)
token_cache = {}

# Function to extract text from the PDF using pdfplumber for faster processing
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() if page.extract_text() else ''
    return text.strip() if text else "Error: Could not extract text from PDF."

# Function to extract text from a .txt file
def extract_text_from_txt(txt_file):
    try:
        text = txt_file.read().decode('utf-8')
    except Exception as e:
        return f"Error: Could not read the text file. Details: {str(e)}"

    return text.strip() if text else "Error: Text file is empty."

# Load the file and extract its content based on the file content
def extract_text_from_file(file_obj):
    # Check if file_obj is None (no file uploaded)
    if file_obj is None:
        return "Error: No file uploaded. Please upload a PDF or TXT file."

    # Check if the file is a PDF or TXT file based on content type
    file_name = file_obj.name if hasattr(file_obj, 'name') else "unknown"

    if file_name.endswith(".pdf"):
        return extract_text_from_pdf(file_obj)
    elif file_name.endswith(".txt"):
        return extract_text_from_txt(file_obj)
    else:
        return "Error: Unsupported file format. Please provide a .pdf or .txt file."

# Generate prompt suggestions based on content
def generate_prompt_suggestions(file_text):
    # Analyze the content to suggest some starter prompts (this is a basic implementation)
    if "chapter" in file_text.lower() or "introduction" in file_text.lower():
        return ["Can you summarize this chapter?", "What is the key takeaway from the introduction?", "Explain the main argument."]
    elif "conclusion" in file_text.lower() or "results" in file_text.lower():
        return ["What are the results mentioned in the document?", "Can you summarize the conclusion?", "What are the key findings?"]
    else:
        return ["What is this document about?", "Summarize the content of this file.", "Can you explain the key points?"]

# Define the chatbot function
def chatbot_fn(prompt, file_obj, chatbot_history=None, max_new_tokens=150):
    try:
        # Initialize chatbot_history if it's None
        if chatbot_history is None:
            chatbot_history = []

        # Extract text from the file
        file_text = extract_text_from_file(file_obj)

        if file_text.startswith("Error"):
            return file_text, chatbot_history  # Return the error message

        # Cache tokenized inputs to avoid redundant work (faster tokenization)
        if file_text not in token_cache:
            token_cache[file_text] = tokenizer(file_text, return_tensors="pt", truncation=True, max_length=1024).to(device)

        inputs = token_cache[file_text]  # Retrieve tokenized input from cache

        # Generate response with `max_new_tokens`
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

        # Decode the response
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Check if the "Assistant:" label exists in the response
        if "Assistant:" in response_text:
            assistant_response = response_text.split("Assistant:")[-1].strip()
        else:
            assistant_response = response_text if response_text else "I couldn't generate a proper response."

        # Update chatbot history with the user and assistant responses
        chatbot_history.append({"role": "user", "content": prompt})
        chatbot_history.append({"role": "assistant", "content": assistant_response})

        return assistant_response, chatbot_history
    except Exception as e:
        return f"Error: {str(e)}", chatbot_history

# Simplified Gradio Interface (light mode, no custom CSS)
with gr.Blocks() as iface:
    gr.Markdown("<h1>Document-based Chatbot (Light Mode)</h1>")

    # Chat window
    with gr.Row():
        with gr.Column():
            chatbox = gr.HTML(elem_id="chatbox", value="")

    # Inputs
    with gr.Row():
        with gr.Column():
            user_prompt = gr.Textbox(label="Your message")
        with gr.Column():
            file_input = gr.File(label="Upload PDF or TXT file")

    # Slider for Max New Tokens
    max_new_tokens = gr.Slider(50, 300, step=10, label="Max New Tokens")

    # Suggested prompts area
    with gr.Row():
        suggested_prompts = gr.HTML("")

    # Output area (chat history)
    output_text = gr.Textbox(visible=False)

    # Chat history state
    chatbot_history = gr.State([])

    # Button to submit
    submit_button = gr.Button("Send")

    # Function to generate and display suggested prompts
    def update_suggested_prompts(file_obj):
        file_text = extract_text_from_file(file_obj)
        suggestions = generate_prompt_suggestions(file_text)
        prompt_html = "<div><b>Suggested Prompts:</b><br>" + "<br>".join(f"- {suggestion}" for suggestion in suggestions) + "</div>"
        return prompt_html

    # Update chat window with user and assistant messages
    def update_chat_window(messages):
        chat_html = ""
        for message in messages:
            role = message["role"]
            content = message["content"]
            if role == "user":
                chat_html += f"<div class='message user-message'>{content}</div>"
            else:
                chat_html += f"<div class='message assistant-message'>{content}</div>"
        return chat_html

    # Function for chatbot interaction
    def interact(prompt, file_obj, history, max_new_tokens):
        response, new_history = chatbot_fn(prompt, file_obj, history, max_new_tokens)
        chat_html = update_chat_window(new_history)
        return chat_html, new_history

    # Bind the file upload to generate prompt suggestions
    file_input.change(update_suggested_prompts, inputs=[file_input], outputs=[suggested_prompts])

    # Bind the button to the chatbot function
    submit_button.click(interact, [user_prompt, file_input, chatbot_history, max_new_tokens], [chatbox, chatbot_history])

# Launch the Gradio interface with share=True to create a public link (use this for testing externally)
iface.launch(share=True, debug=True)




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0f09b5d303d272c0f8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://0f09b5d303d272c0f8.gradio.live




In [11]:
# Install necessary libraries in Google Colab
!pip install transformers gradio pdfplumber torch pyngrok --quiet

import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import pdfplumber  # Optimized PDF processing
from pyngrok import ngrok
import os
import random

# Use a smaller model like DistilGPT-2 or GPT-2
model_name = "distilgpt2"  # You can also use "gpt2" or "flan-t5-small"

# Use GPU if available, else fall back to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Cache for tokenized inputs (to avoid re-tokenizing the same content)
token_cache = {}

# Function to extract text from the PDF using pdfplumber for faster processing
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() if page.extract_text() else ''
    return text.strip() if text else "Error: Could not extract text from PDF."

# Function to extract text from a .txt file
def extract_text_from_txt(txt_file):
    try:
        text = txt_file.read().decode('utf-8')
    except Exception as e:
        return f"Error: Could not read the text file. Details: {str(e)}"

    return text.strip() if text else "Error: Text file is empty."

# Load the file and extract its content based on the file content
def extract_text_from_file(file_obj):
    # Check if file_obj is None (no file uploaded)
    if file_obj is None:
        return "Error: No file uploaded. Please upload a PDF or TXT file."

    # Check if the file is a PDF or TXT file based on content type
    file_name = file_obj.name if hasattr(file_obj, 'name') else "unknown"

    if file_name.endswith(".pdf"):
        return extract_text_from_pdf(file_obj)
    elif file_name.endswith(".txt"):
        return extract_text_from_txt(file_obj)
    else:
        return "Error: Unsupported file format. Please provide a .pdf or .txt file."

# Generate prompt suggestions based on content
def generate_prompt_suggestions(file_text):
    # Analyze the content to suggest some starter prompts (this is a basic implementation)
    if "chapter" in file_text.lower() or "introduction" in file_text.lower():
        return ["Can you summarize this chapter?", "What is the key takeaway from the introduction?", "Explain the main argument."]
    elif "conclusion" in file_text.lower() or "results" in file_text.lower():
        return ["What are the results mentioned in the document?", "Can you summarize the conclusion?", "What are the key findings?"]
    else:
        return ["What is this document about?", "Summarize the content of this file.", "Can you explain the key points?"]

# Define the chatbot function
def chatbot_fn(prompt, file_obj, chatbot_history=None, max_new_tokens=150):
    try:
        # Initialize chatbot_history if it's None
        if chatbot_history is None:
            chatbot_history = []

        # Extract text from the file
        file_text = extract_text_from_file(file_obj)

        if file_text.startswith("Error"):
            return file_text, chatbot_history  # Return the error message

        # Cache tokenized inputs to avoid redundant work (faster tokenization)
        if file_text not in token_cache:
            token_cache[file_text] = tokenizer(file_text, return_tensors="pt", truncation=True, max_length=1024).to(device)

        inputs = token_cache[file_text]  # Retrieve tokenized input from cache

        # Generate response with `max_new_tokens`
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

        # Decode the response
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Check if the "Assistant:" label exists in the response
        if "Assistant:" in response_text:
            assistant_response = response_text.split("Assistant:")[-1].strip()
        else:
            assistant_response = response_text if response_text else "I couldn't generate a proper response."

        # Update chatbot history with the user and assistant responses
        chatbot_history.append({"role": "user", "content": prompt})
        chatbot_history.append({"role": "assistant", "content": assistant_response})

        return assistant_response, chatbot_history
    except Exception as e:
        return f"Error: {str(e)}", chatbot_history

# Simplified Gradio Interface (light mode, no custom CSS)
with gr.Blocks() as iface:
    gr.Markdown("<h1>Document-based Chatbot (Light Mode)</h1>")

    # Chat window
    with gr.Row():
        with gr.Column():
            chatbox = gr.HTML(elem_id="chatbox", value="")

    # Inputs
    with gr.Row():
        with gr.Column():
            user_prompt = gr.Textbox(label="Your message")
        with gr.Column():
            file_input = gr.File(label="Upload PDF or TXT file")

    # Slider for Max New Tokens
    max_new_tokens = gr.Slider(50, 300, step=10, label="Max New Tokens")

    # Suggested prompts area
    with gr.Row():
        suggested_prompts = gr.HTML("")

    # Output area (chat history)
    output_text = gr.Textbox(visible=False)

    # Chat history state
    chatbot_history = gr.State([])

    # Button to submit
    submit_button = gr.Button("Send")

    # Function to generate and display suggested prompts
    def update_suggested_prompts(file_obj):
        file_text = extract_text_from_file(file_obj)
        suggestions = generate_prompt_suggestions(file_text)
        prompt_html = "<div><b>Suggested Prompts:</b><br>" + "<br>".join(f"- {suggestion}" for suggestion in suggestions) + "</div>"
        return prompt_html

    # Update chat window with user and assistant messages
    def update_chat_window(messages):
        chat_html = ""
        for message in messages:
            role = message["role"]
            content = message["content"]
            if role == "user":
                chat_html += f"<div class='message user-message'>{content}</div>"
            else:
                chat_html += f"<div class='message assistant-message'>{content}</div>"
        return chat_html

    # Function for chatbot interaction
    def interact(prompt, file_obj, history, max_new_tokens):
        response, new_history = chatbot_fn(prompt, file_obj, history, max_new_tokens)
        chat_html = update_chat_window(new_history)
        return chat_html, new_history

    # Bind the file upload to generate prompt suggestions
    file_input.change(update_suggested_prompts, inputs=[file_input], outputs=[suggested_prompts])

    # Bind the button to the chatbot function
    submit_button.click(interact, [user_prompt, file_input, chatbot_history, max_new_tokens], [chatbox, chatbot_history])

# Start Gradio on a random port to avoid port conflicts
port = random.randint(8000, 9000)  # Dynamic port selection
iface.launch(server_name="0.0.0.0", server_port=port, share=False)

# Start Ngrok tunnel to expose the Gradio app
public_url = ngrok.connect(port)
print(f"Ngrok Tunnel URL: {public_url}")




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

Ngrok Tunnel URL: NgrokTunnel: "https://7103-35-224-185-184.ngrok-free.app" -> "http://localhost:8915"


In [9]:
!ngrok config add-authtoken 2nrVQ6aD9BlhbMim7so6JxInX3w_3zTa7muTpscFegJCi4s6Q

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
