In [1]:
!pip install -q groq pymupdf sentence-transformers faiss-cpu faster-whisper gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.9/24.9 MB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.0/39.0 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import gradio as gr
import requests
import fitz
import re
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
from faster_whisper import WhisperModel
import os

# =========================
# INITIALIZE MODELS
# =========================

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
whisper_model = WhisperModel("base", compute_type="int8")

# Retrieve Groq API key from environment variables
groq_api_key = "Your Api"
client = Groq(api_key=groq_api_key)
MODEL_NAME = "llama-3.3-70b-versatile"

# Global storage
sections = {}
section_texts = []
index = None


# =========================
# PDF FUNCTIONS
# =========================

def download_arxiv_pdf(arxiv_id):
    try:
        url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
        response = requests.get(url)
        response.raise_for_status()

        file_path = f"{arxiv_id}.pdf"
        with open(file_path, "wb") as f:
            f.write(response.content)

        return file_path
    except:
        return None


def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text


def extract_sections(text):

    patterns = [
        r"\n([IVX]+\.\s+[A-Z][A-Z\s]+)",        # Roman numeral ALL CAPS
        r"\n(\d+\.\s+[A-Z][^\n]+)",             # 1. Introduction
        r"\n(\d+\s+[A-Z][^\n]+)",               # 1 Introduction
        r"\n([A-Z][A-Z\s]{3,})\n"               # ALL CAPS standalone
    ]

    matches = []
    for pattern in patterns:
        matches.extend(list(re.finditer(pattern, text)))

    matches = sorted(matches, key=lambda x: x.start())

    sections = {}
    for i, match in enumerate(matches):
        title = match.group(1).strip()
        start = match.end()
        end = matches[i+1].start() if i+1 < len(matches) else len(text)
        sections[title] = text[start:end].strip()

    return sections


# =========================
# VECTOR STORE
# =========================

def build_vector_store(sections_dict):
    global index, section_texts

    section_texts = list(sections_dict.values())

    if len(section_texts) == 0:
        index = None
        return

    embeddings = embedding_model.encode(section_texts)
    embeddings = np.array(embeddings).astype("float32")

    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)


# =========================
# LOAD PAPER
# =========================

def load_paper(arxiv_id):
    global sections, index

    pdf_path = download_arxiv_pdf(arxiv_id)

    if pdf_path is None:
        return gr.update(choices=[]), "❌ Invalid arXiv ID"

    text = extract_text_from_pdf(pdf_path)
    sections = extract_sections(text)

    build_vector_store(sections)

    return gr.update(choices=list(sections.keys())), "✅ Paper Loaded Successfully"


# =========================
# SUMMARIZATION
# =========================

def summarize_section(section_title):
    if section_title not in sections:
        return "Please load paper first."

    content = sections[section_title]

    prompt = f"""
You are an expert AI research assistant.

Generate a structured scientific summary:
- Main idea
- Key technical concepts
- Important equations explained simply
- Why this section matters

Section Title: {section_title}
Section Content:
{content[:6000]}
"""

    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )

    return response.choices[0].message.content


# =========================
# RAG CHAT
# =========================

def rag_chat(message, history):
    global index

    if index is None:
        history.append((message, "Please load a paper first."))
        return history, ""

    query_embedding = embedding_model.encode([message])
    query_embedding = np.array(query_embedding).astype("float32")

    D, I = index.search(query_embedding, k=3)

    retrieved = "\n\n".join([section_texts[i] for i in I[0]])

    prompt = f"""
Answer strictly using the provided research paper context.
If the answer is not found, say:
"The answer is not available in the provided paper."

Context:
{retrieved}

Question:
{message}
"""

    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2
    )

    answer = response.choices[0].message.content
    history.append((message, answer))
    return history, ""


# =========================
# VOICE CHAT
# =========================

def voice_chat(audio, history):
    if audio is None:
        return history, ""

    segments, _ = whisper_model.transcribe(audio)
    text = "".join([segment.text for segment in segments])

    return rag_chat(text, history)


# =========================
# GRADIO UI
# =========================

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📚 ArXiv RAG Research Assistant")

    with gr.Row():
        arxiv_input = gr.Textbox(label="Enter arXiv ID (e.g., 1706.03762)")
        load_button = gr.Button("Load Paper")

    load_status = gr.Markdown()

    section_dropdown = gr.Dropdown(label="Select Section")
    summarize_button = gr.Button("Generate Summary")
    summary_output = gr.Markdown()

    gr.Markdown("## 💬 Research Chat")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Ask a question")
    send = gr.Button("Send")

    gr.Markdown("## 🎙 Voice Question")
    audio_input = gr.Audio(type="filepath")
    voice_button = gr.Button("Ask via Voice")

    # Actions
    load_button.click(load_paper, inputs=arxiv_input, outputs=[section_dropdown, load_status])
    summarize_button.click(summarize_section, inputs=section_dropdown, outputs=summary_output)
    send.click(rag_chat, inputs=[msg, chatbot], outputs=[chatbot, msg])
    voice_button.click(voice_chat, inputs=[audio_input, chatbot], outputs=[chatbot, msg])

demo.launch(debug=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]



config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  with gr.Blocks(theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot()
  chatbot = gr.Chatbot()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://68ab6ac0d1eb79f4d1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
