In [None]:
!pip install nest_asyncio gradio aiohttp openai exa_py

import nest_asyncio
nest_asyncio.apply()

import asyncio
import aiohttp
import gradio as gr
import os
from openai import OpenAI
import json
from exa_py import Exa

# Configuration
NEBIUS_API_KEY = "your_nebius_api_key" # Replace with your Nebius API key
EXA_API_KEY = "your_exa_api_key" # Replace with your EXA API key

exa = Exa(EXA_API_KEY)
client = OpenAI(base_url="https://api.studio.nebius.ai/v1/", api_key=NEBIUS_API_KEY)

DEFAULT_MODEL = "deepseek-ai/DeepSeek-R1-fast"

# Functions
async def call_nebius_async(session, messages, model=DEFAULT_MODEL):
    completion = client.chat.completions.create(model=model, messages=messages, temperature=0.6)
    if completion.choices:
        return completion.choices[0].message.content
    return None

async def generate_search_queries_async(session, user_query):
    prompt = (
        "You are an expert research assistant. Generate exactly four distinct search queries "
        "that would help gather complete information about this topic. Format your response "
        "as a Python list of strings. Example format: ['query1', 'query2', 'query3', 'query4']. "
        f"Query: {user_query}"
    )
    messages = [
        {"role": "system", "content": "You are a helpful and precise research assistant."},
        {"role": "user", "content": prompt}
    ]
    response = await call_nebius_async(session, messages)
    if response:
        try:
            cleaned_response = response.strip()
            import re
            if not cleaned_response.startswith('['):
                list_match = re.search(r'\[(.*?)\]', cleaned_response)
                if list_match:
                    cleaned_response = list_match.group(0)
            search_queries = eval(cleaned_response)
            if isinstance(search_queries, list):
                return search_queries
        except Exception as e:
            print(f"Error parsing search queries: {str(e)}")
    return []

async def perform_search_async(query):
    try:
        response = await asyncio.to_thread(exa.search_and_contents, query)
        if response and response.results:
            return [(result.url, result.text) for result in response.results if result.text]
    except Exception as e:
        print(f"Exa API search failed for query '{query}': {e}")
    return []

async def is_page_useful_async(session, user_query, page_text):
    prompt = (
        "You are a critical research evaluator. Given the user's query and the content of a webpage, "
        "determine if the webpage contains information that is useful for addressing the query. "
        "Respond with exactly one word: 'Yes' if the page is useful, or 'No' if it is not. Do not include any extra text."
    )
    messages = [
        {"role": "system", "content": "You are a strict and concise evaluator of research relevance."},
        {"role": "user", "content": f"User Query: {user_query}\n\nWebpage Content (first 2000 characters):\n{page_text[:2000]}\n\n{prompt}"}
    ]
    response = await call_nebius_async(session, messages)
    if response:
        answer = response.strip()
        if answer in ["Yes", "No"]:
            return answer
        elif "Yes" in answer:
            return "Yes"
        elif "No" in answer:
            return "No"
    return "No"

async def extract_relevant_context_async(session, user_query, search_query, page_text):
    prompt = (
        "You are an expert information extractor. Given the user's query, the search query that led to this page, "
        "and the webpage content, extract all pieces of information that are useful for answering the user's query. "
        "Return only the relevant context as plain text without extra commentary."
    )
    messages = [
        {"role": "system", "content": "You are an expert in extracting and summarizing relevant information."},
        {"role": "user", "content": f"User Query: {user_query}\nSearch Query: {search_query}\n\nWebpage Content (first 2000 characters):\n{page_text[:2000]}\n\n{prompt}"}
    ]
    response = await call_nebius_async(session, messages)
    return response.strip() if response else ""

async def get_new_search_queries_async(session, user_query, previous_search_queries, all_contexts):
    context_combined = "\n".join(all_contexts)
    prompt = (
        "You are an analytical research assistant. Based on the original query, the search queries performed so far, "
        "and the extracted contexts from webpages, decide if further research is needed. "
        "If further research is needed, provide up to four new search queries as a Python list (for example, "
        "['new query1', 'new query2']). If you believe no further research is needed, respond with exactly <done>."
        "\nOutput only a Python list or the token <done> without any extra text."
    )
    messages = [
        {"role": "system", "content": "You are a systematic research planner."},
        {"role": "user", "content": f"User Query: {user_query}\nPrevious Search Queries: {previous_search_queries}\n\nExtracted Relevant Contexts:\n{context_combined}\n\n{prompt}"}
    ]
    response = await call_nebius_async(session, messages)
    if response:
        cleaned = response.strip()
        if cleaned == "<done>":
            return "<done>"
        try:
            new_queries = eval(cleaned)
            if isinstance(new_queries, list):
                return new_queries
            print("LLM did not return a list for new search queries. Response:", response)
        except Exception as e:
            print("Error parsing new search queries:", e, "\nResponse:", response)
    return []

async def generate_final_report_async(session, user_query, all_contexts):
    context_combined = "\n".join(all_contexts)
    prompt = (
        "You are an expert researcher and report writer. Based on the gathered contexts below and the original query, "
        "write a complete, well-structured, and detailed report that addresses the query thoroughly. "
        "Include all useful insights and conclusions without extra commentary."
    )
    messages = [
        {"role": "system", "content": "You are a skilled report writer."},
        {"role": "user", "content": f"User Query: {user_query}\n\nGathered Relevant Contexts:\n{context_combined}\n\n{prompt}"}
    ]
    return await call_nebius_async(session, messages)

async def process_link(session, link_and_content, user_query, search_query, log):
    link, page_text = link_and_content
    log.append(f"Processing content from: {link}")
    if not page_text:
        log.append(f"No content received from Exa for: {link}")
        return None
    usefulness = await is_page_useful_async(session, user_query, page_text)
    log.append(f"Page usefulness for {link}: {usefulness}")
    if usefulness == "Yes":
        context = await extract_relevant_context_async(session, user_query, search_query, page_text)
        if context:
            log.append(f"Extracted context from {link} (first 200 chars): {context[:200]}")
            return context
    return None

async def async_research(user_query, iteration_limit):
    aggregated_contexts = []
    all_search_queries = []
    log_messages = []
    iteration = 0

    async with aiohttp.ClientSession() as session:
        log_messages.append("Generating initial search queries...")
        new_search_queries = await generate_search_queries_async(session, user_query)
        if not new_search_queries:
            log_messages.append("No search queries were generated by the LLM. Exiting.")
            return "No search queries were generated by the LLM. Exiting.", "\n".join(log_messages)
        all_search_queries.extend(new_search_queries)
        log_messages.append(f"Initial search queries: {new_search_queries}")

        while iteration < iteration_limit:
            log_messages.append(f"\n=== Iteration {iteration + 1} ===")
            iteration_contexts = []
            search_tasks = [perform_search_async(query) for query in new_search_queries]
            search_results = await asyncio.gather(*search_tasks)

            unique_links = {}
            for idx, results in enumerate(search_results):
                query_used = new_search_queries[idx]
                for url, content in results:
                    if url not in unique_links:
                        unique_links[url] = (url, content), query_used

            log_messages.append(f"Aggregated {len(unique_links)} unique links from this iteration.")
            link_tasks = [
                process_link(session, link_data[0], user_query, link_data[1], log_messages)
                for link_data in unique_links.values()
            ]
            link_results = await asyncio.gather(*link_tasks)
            for res in link_results:
                if res:
                    iteration_contexts.append(res)

            if iteration_contexts:
                aggregated_contexts.extend(iteration_contexts)
                log_messages.append(f"Found {len(iteration_contexts)} useful contexts in this iteration.")
            else:
                log_messages.append("No useful contexts were found in this iteration.")

            new_search_queries = await get_new_search_queries_async(session, user_query, all_search_queries, aggregated_contexts)
            if new_search_queries == "<done>":
                log_messages.append("LLM indicated that no further research is needed.")
                break
            elif new_search_queries:
                log_messages.append(f"LLM provided new search queries: {new_search_queries}")
                all_search_queries.extend(new_search_queries)
            else:
                log_messages.append("LLM did not provide any new search queries. Ending the loop.")
                break

            iteration += 1

        log_messages.append("\nGenerating final report...")
        final_report = await generate_final_report_async(session, user_query, aggregated_contexts)
        return final_report, "\n".join(log_messages)

def run_research(user_query, iteration_limit=10):
    return asyncio.run(async_research(user_query, iteration_limit))

# Gradio UI Setup
def gradio_run(user_query, iteration_limit):
    try:
        final_report, logs = run_research(user_query, int(iteration_limit))
        return final_report, logs
    except Exception as e:
        return f"An error occurred: {e}", ""

iface = gr.Interface(
    fn=gradio_run,
    inputs=[
        gr.Textbox(lines=2, label="Research Query/Topic", placeholder="Enter your research query here..."),
        gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Max Iterations")
    ],
    outputs=[
        gr.Textbox(label="Final Report", lines=10),
        gr.Textbox(label="Intermediate Steps Log", lines=5),
    ],
    title="OpenDeepSeek-Researcher",
    description="Enter your query and a maximum iteration count to generate a detailed report. The log will show the steps taken during the research process.",

    css="""
        body {
            font-family: 'Arial', sans-serif;
        }
        .gradio-container {
            max-width: 900px;
            margin: 0 auto;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
        }
        .gr-box {
            border-radius: 8px;
        }
        .gr-button {
            background-color: #4CAF50; /* Example color */
            color: white;
            border: none;
            padding: 10px 20px;
            text-align: center;
            text-decoration: none;
            display: inline-block;
            font-size: 16px;
            border-radius: 5px;
            cursor: pointer;
        }

    """,
    examples=[
        ["What are the benefits of intermittent fasting?", 5],
        ["Explain the history of the internet.", 3],
        ["Compare Nebius Studio with other Providers.", 2],
        ["Cheapest FLUX model providers.", 4]
    ]
)

iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://246572fe2afc0eef49.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


