<a href="https://colab.research.google.com/github/AbdullahFaiza/Deep-Learning-Spring-2025/blob/main/ReseaAIAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ Resea: Virtual Research Agent
# Uses Hugging Face + SerpAPI, generates .docx/.pdf reports with web interface

!pip uninstall -y serpapi -q
!pip install -q google-search-results transformers newspaper3k wikipedia python-docx lxml[html_clean] reportlab gradio

[0m  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.1/211.1 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 k

In [2]:
# === Imports ===
import os
import re
import wikipedia
import gradio as gr
from urllib.parse import urlparse
from newspaper import Article
from serpapi import GoogleSearch
from transformers import pipeline
from docx import Document
from reportlab.pdfgen import canvas


In [3]:
# === SerpAPI Key ===
SERPAPI_KEY = "445e154e5df8fa8655d9031c56e672cbdfa5d37cb74d6d021ca54d3fb400df1c"

In [4]:
# === Summarizer ===
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


In [5]:
# === Wikipedia Functions ===
def get_wikipedia_summary(topic):
    try:
        page = wikipedia.page(topic)
        text = re.sub(r'==.*?==+', '', page.content)
        return text[:3000]
    except Exception as e:
        return f"Wikipedia Error: {str(e)}"

def get_references(topic):
    try:
        page = wikipedia.page(topic)
        refs = page.references[:5]
        scored = []
        for ref in refs:
            domain = urlparse(ref).netloc
            score = 5 if domain.endswith('.edu') or domain.endswith('.gov') else 4 if domain.endswith('.org') else 3
            scored.append(f"{ref} [Credibility Score: {score}/5]")
        return scored
    except:
        return ["No references found."]


In [6]:
# === Web Search & Summarize ===
def get_web_results(topic):
    search = GoogleSearch({"q": topic, "api_key": SERPAPI_KEY, "num": 3})
    results = search.get_dict().get("organic_results", [])
    return [res.get("link") for res in results if res.get("link")]

def summarize_article(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        if not article.text.strip():
            return f"[Empty article at {url}]"
        text = article.text[:2000]
        summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"[Error summarizing {url}: {str(e)}]"

In [7]:
# === Report Generator ===
def generate_research_report(topic):
    summary = get_wikipedia_summary(topic)
    refs = get_references(topic)
    urls = get_web_results(topic)
    web_summaries = [(url, summarize_article(url)) for url in urls]

    # Save as DOCX
    doc = Document()
    doc.add_heading(f"Research Report: {topic}", 0)

    doc.add_heading("Wikipedia Summary", level=1)
    doc.add_paragraph(summary)

    doc.add_heading("Wikipedia References", level=1)
    for ref in refs:
        doc.add_paragraph(ref)

    doc.add_heading("Web Summaries", level=1)
    for url, summ in web_summaries:
        score = 5 if ".edu" in url or ".gov" in url else 4 if ".org" in url else 3
        doc.add_paragraph(f"{url} [Credibility Score: {score}/5]\nSummary: {summ}")

    docx_file = f"{topic.replace(' ', '_')}_Resea_Report.docx"
    doc.save(docx_file)

    # Save as PDF
    pdf_file = f"{topic.replace(' ', '_')}_Resea_Summary.pdf"
    pdf = canvas.Canvas(pdf_file)
    pdf.drawString(50, 800, f"Research Summary: {topic}")
    pdf.drawString(50, 780, summary[:500])
    pdf.save()

    # Return results to UI
    formatted = f"""
📘 Research Report: {topic}

📝 Wikipedia Summary:
{summary}

🔗 References:
{chr(10).join(refs)}

🌐 Web Articles:
"""
    for url, summ in web_summaries:
        score = 5 if ".edu" in url or ".gov" in url else 4 if ".org" in url else 3
        formatted += f"\n- {url} [Credibility Score: {score}/5]\nSummary: {summ[:400]}\n"

    return formatted, docx_file, pdf_file



In [8]:
# === Gradio Interface ===
from google.colab import files
files.upload()

with gr.Blocks() as demo:
    with gr.Row():
        gr.Image("resea_mascot.png", width=180, show_label=False)
    gr.Markdown("""# 🤖 Resea: Virtual Research Assistant\nType a research topic below and get a complete report including citations, credibility, and summaries.\n👉 If viewing inline, [open full app in new tab](https://gradio.live) after clicking below.""")
    topic_input = gr.Textbox(label="Enter Research Topic")
    generate_btn = gr.Button("Generate Report")
    output_text = gr.Textbox(lines=20, label="Formatted Report")
    docx_file = gr.File(label="Download DOCX")
    pdf_file = gr.File(label="Download PDF")

    generate_btn.click(fn=generate_research_report, inputs=topic_input, outputs=[output_text, docx_file, pdf_file])

    gr.Markdown("After clicking Generate Report, you'll see a download link appear above. To view the full app in a new tab, [click here](https://gradio.live)")

demo.launch(share=True)

Saving Resea white.png to Resea white.png
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0af668d91378a2afa4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


