In [None]:
!pip install transformers gradio requests beautifulsoup4


In [None]:
import re
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
import gradio as gr

In [None]:
def clean_text(text):
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'\s{2,}', ' ', text)
    text = re.sub(r'\[.*?\]|\(.*?\)', '', text)
    return text.strip()


In [None]:
def get_arxiv_paper(title):
    search_url = f"http://export.arxiv.org/api/query?search_query=all:{title}&start=0&max_results=1"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.content, "xml")
    entry = soup.find('entry')
    if not entry:
        return None, None
    paper_title = entry.title.text.strip()
    summary = entry.summary.text.strip()
    return paper_title, summary


In [None]:
summarizer = pipeline("summarization", model="google/flan-t5-large")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")

In [None]:
def generate_summary(paper_abstract):
    cleaned = clean_text(paper_abstract)
    summary = summarizer(cleaned, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
    return summary

In [None]:
def answer_question(abstract, question):
    prompt = f"Context: {abstract}\n\nQuestion: {question}\nAnswer:"
    response = qa_pipeline(prompt, max_length=200)[0]['generated_text']
    return response.strip()

In [None]:
def paper_bot(title_input, question_input):
    paper_title, abstract = get_arxiv_paper(title_input)
    if not abstract:
        return " Paper not found. Try a different title."

    summary = generate_summary(abstract)
    answer_section = ""
    if question_input:
        answer = answer_question(abstract, question_input)
        answer_section = f"### Answer to your question:\n{answer}\n\n"

    return (
        f"##  Paper Title: {paper_title}\n\n"
        f"---\n\n"
        f"###  Summary:\n{summary}\n\n"
        f"{answer_section}"
        f"---\n"
        f"🔗 [Read full paper on arXiv](https://arxiv.org/search/?query={'+'.join(title_input.split())}&searchtype=all)"
    )


In [None]:
gr.Interface(
    fn=paper_bot,
    inputs=[
        gr.Textbox(label="Enter Research Paper Title"),
        gr.Textbox(label="Ask a Question About the Paper (optional)")
    ],
    outputs="markdown",
    title="Research Paper Q&A Bot",
    description="Enter a paper title to get a summary and optionally ask a question based on the paper abstract."
).launch()
