In [1]:
#Step 1: Installing Core Dependencies/necessary library
!pip install duckduckgo-search transformers sentencepiece
import warnings

# Suppress only the specific warning about datetime.utcnow()
warnings.filterwarnings("ignore", message=".*datetime.datetime.utcnow().*")

Collecting duckduckgo-search
  Downloading duckduckgo_search-8.1.1-py3-none-any.whl.metadata (16 kB)
Collecting primp>=0.15.0 (from duckduckgo-search)
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading duckduckgo_search-8.1.1-py3-none-any.whl (18 kB)
Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.3/3.3 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: primp, duckduckgo-search
Successfully installed duckduckgo-search-8.1.1 primp-0.15.0


In [2]:
#Step 2: Loading the AI Model
from duckduckgo_search import DDGS
from transformers import pipeline
import json, re, urllib, requests

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [3]:
# Step 3: Setup Memory System for the MEMORY_FILE
import json
import os

MEMORY_FILE = "memory.json"

default_memory = {
    "preferred_summary_length": 150,
    "credibility_priority": "medium"
}

# Load memory
if os.path.exists(MEMORY_FILE):
    with open(MEMORY_FILE, "r") as f:
        memory = json.load(f)
else:
    memory = default_memory
    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=4)

def update_memory(key, value):
    memory[key] = value
    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=4)


In [4]:
#Step 4: Safety Filter to Avoid Banned Words
def safety_check(query):
    banned = ["how to make a bomb", "suicide", "harm", "weapons", "kill"]
    lower = query.lower()
    for b in banned:
        if b in lower:
            return False
    return True


In [5]:
#Step 5: Web Search Helper Function to Send Topic to DuckDuckGo
def web_search(topic, max_results=5):
    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(topic, max_results=max_results))
            return results
    except Exception as e:
        return {"error": str(e)}


In [6]:
#Step 6: Summarization Function to Feed Raw Text Into the Model Based on Length Preferences Stored in the "Memory" File
def summarize_text(text):
    max_len = memory["preferred_summary_length"]
    return summarizer(text, max_length=max_len//2, min_length=max_len//4, do_sample=False)[0]['summary_text']


In [7]:
#Step 7: Credibility Checker, a Logic Check to Rate Trustworthiness of Source
def evaluate_source(url):
    if "edu" in url:
        return "High credibility (.edu)"
    if "gov" in url:
        return "High credibility (.gov)"
    if "org" in url:
        return "Medium credibility (.org)"
    return "Low credibility"


In [8]:
#Step 8: Feedback Mechanism, a Function that Updates the memory.json File
def apply_feedback(feedback):
    if feedback == "too long":
        memory["preferred_summary_length"] = max(80, memory["preferred_summary_length"] - 30)
    elif feedback == "too short":
        memory["preferred_summary_length"] = min(400, memory["preferred_summary_length"] + 30)
    elif feedback == "good":
        pass
    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=4)


In [9]:
# Step 9: Main Agent Logic that Ties the Previous Cells Together Into a Workflow
#1. Check Safety
#2. Search Web
#3. Evaluate the Sources (High/Low Credibility)
#4. Summarize the Text
#5. Return the Report
def research_agent(topic):

    if not safety_check(topic):
        return "‚ùå Request denied for safety reasons."

    # Limit noisy output
    print("üîé Searching...")
    results = web_search(topic, max_results=5)

    if isinstance(results, dict) and "error" in results:
        return "Search failed."

    combined_text = ""
    report = []

    for r in results[:5]:  # Hard cap
        title = r.get("title", "")[:120]  # limit
        snippet = r.get("body", "")[:300]  # limit
        url = r.get("href", "")

        credibility = evaluate_source(url)

        report.append({
            "title": title,
            "url": url,
            "credibility": credibility,
        })

        combined_text += snippet + " "

    # Limit summary input (avoid infinite text)
    combined_text = combined_text[:2000]

    print("üß† Summarizing...")

    try:
        summary = summarize_text(combined_text)
    except Exception as e:
        summary = f"Summarizer error: {e}"

    return {
        "topic": topic,
        "summary": summary,
        "sources": report
    }


In [10]:
#Step 10: Install UI Library
!pip install gradio



In [11]:
#Step 11: Install Wikipedia Library as a Backup Search Method
!pip install wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11678 sha256=2c8f683fef89d82e95576480ab7e607a0b1de197c979848a1215a63a8b6587e0
  Stored in directory: /root/.cache/pip/wheels/63/47/7c/a9688349aa74d228ce0a9023229c6c0ac52ca2a40fe87679b8
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [12]:
#Step 12: Final Hybrid Agent & Interface (The Main Application)
# - Implements a "Fallback Strategy if DuckDuckGo Fails, it Automatically Switches to Search Wikipedia Instead"
# - UI Construction to Build Interface Using Gradio
# - Launch that Generates a Public Link to Open Tool in a Web Browser
import gradio as gr
import warnings
import time
import wikipedia
from duckduckgo_search import DDGS

# 1. CLEAN UP LOGS
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# 2. DEFINE HYBRID AGENT (Search Engine + Wikipedia Fallback)
def research_agent_hybrid(topic):
    if not topic.strip():
        return "‚ö†Ô∏è Please enter a valid topic."

    print(f"üîé Researching: {topic}...")

    results = []
    source_type = "Search Engine"

    # --- STRATEGY A: DUCKDUCKGO (Search Engine) ---
    backends = ['api', 'html', 'lite']
    for backend in backends:
        try:
            with DDGS() as ddgs:
                search_gen = ddgs.text(topic, max_results=7, backend=backend)
                results = list(search_gen)
            if results:
                break # Success
            time.sleep(0.5)
        except Exception:
            continue

    # --- STRATEGY B: WIKIPEDIA (Fallback) ---
    # If DuckDuckGo failed (results is empty), we try Wikipedia
    if not results:
        print("   ‚ö†Ô∏è Search engine blocked. Switching to Wikipedia fallback...")
        source_type = "Wikipedia"
        try:
            # Search for the page
            wiki_search = wikipedia.search(topic)
            if wiki_search:
                # Get the page content of the top result
                page = wikipedia.page(wiki_search[0], auto_suggest=False)

                # Format it exactly like a search result so the rest of the code works
                results.append({
                    "title": page.title,
                    "url": page.url,
                    "body": page.summary, # Wikipedia summary is high quality
                    "credibility": "High credibility (Wikipedia)"
                })
        except wikipedia.exceptions.DisambiguationError as e:
            return f"‚ö†Ô∏è Topic is too ambiguous. Did you mean: {', '.join(e.options[:5])}?"
        except wikipedia.exceptions.PageError:
            pass # No page found
        except Exception as e:
            print(f"Wikipedia error: {e}")

    # --- FINAL SAFETY CHECK ---
    if not results:
        return "‚ö†Ô∏è All research methods failed. Colab IPs are currently heavily restricted. Please try again later."

    # --- PROCESSING DATA ---
    combined_text = ""
    report = []

    for r in results[:5]:
        title = r.get("title", "Untitled")
        snippet = r.get("body", r.get("content", ""))
        url = r.get("href", r.get("url", "#")) # Handle both DDG and Wiki keys

        if not snippet: continue

        # Credibility check (If it came from Wiki, we already marked it)
        credibility = r.get("credibility", "Low credibility")
        if "High" not in credibility:
            if "edu" in url: credibility = "High credibility (.edu)"
            elif "gov" in url: credibility = "High credibility (.gov)"
            elif "org" in url: credibility = "Medium credibility (.org)"

        report.append({
            "title": title[:100],
            "url": url,
            "credibility": credibility,
        })
        combined_text += snippet + " "

    # Use the summary directly if it's Wikipedia (it's already a summary)
    # Otherwise, use the AI model to summarize the search snippets
    final_summary = ""
    if source_type == "Wikipedia":
        final_summary = combined_text[:1500] # Wiki summaries are already clean
    else:
        print("üß† Synthesizing summary with AI...")
        try:
            if len(combined_text) < 50:
                final_summary = "Not enough data to summarize."
            else:
                final_summary = summarizer(combined_text[:3000], max_length=200, min_length=50, do_sample=False)[0]['summary_text']
        except Exception as e:
            final_summary = f"Summarizer Error: {e}"

    return {
        "topic": topic,
        "summary": final_summary,
        "sources": report,
        "method": source_type
    }

# 3. DEFINE UI
def run_research_ui(topic):
    response = research_agent_hybrid(topic)

    if isinstance(response, str):
        return f"**Status:** {response}"

    output_md = f"# üß† Research Summary: {response['topic'].title()}\n"
    output_md += f"*(Source: {response['method']})*\n\n"
    output_md += f"{response['summary']}\n\n"
    output_md += "---\n"
    output_md += "### üìö References\n"

    for source in response['sources']:
        output_md += f"* üîó [{source['title']}]({source['url']})\n"

    return output_md

# 4. LAUNCH
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ü§ñ AI Research Agent (Hybrid)")
    with gr.Row():
        topic_input = gr.Textbox(label="Research Topic", placeholder="Enter topic...", scale=4)
        submit_btn = gr.Button("Research", variant="primary", scale=1)
    output_display = gr.Markdown(label="Results")

    submit_btn.click(fn=run_research_ui, inputs=topic_input, outputs=output_display)
    topic_input.submit(fn=run_research_ui, inputs=topic_input, outputs=output_display)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6d608e4ebbe14d389e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


