In [1]:
import os
import json
import requests
import arxiv
from openai import OpenAI
from typing import List, Dict, Optional
from dotenv import load_dotenv
from IPython.display import display, Markdown
import time


In [2]:
load_dotenv('../backend/.env')


True

In [3]:
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [4]:
print("‚úÖ Environment loaded successfully!")
print(f"üìä Available APIs: OpenAI, Wikipedia, arXiv, NewsAPI")

‚úÖ Environment loaded successfully!
üìä Available APIs: OpenAI, Wikipedia, arXiv, NewsAPI


In [5]:
def fetch_wikipedia(query: str, max_chars: int = 500) -> Optional[Dict]:
    """Fetch Wikipedia content using MediaWiki API"""
    try:
        # Clean query
        clean_query = query.replace(" ", "_")
        
        # MediaWiki API
        api_url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "query",
            "format": "json",
            "prop": "extracts|info",
            "exintro": True,
            "explaintext": True,
            "titles": clean_query,
            "inprop": "url"
        }
        
        headers = {'User-Agent': 'ResearchAssistant/1.0'}
        response = requests.get(api_url, params=params, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            pages = data.get("query", {}).get("pages", {})
            
            for page_id, page_info in pages.items():
                if page_id != "-1":  # Valid page
                    content = page_info.get('extract', '')
                    if len(content) > max_chars:
                        content = content[:max_chars] + "..."
                    
                    return {
                        "title": page_info.get('title'),
                        "content": content,
                        "url": f"https://en.wikipedia.org/?curid={page_id}",
                        "source": "Wikipedia",
                        "type": "encyclopedia"
                    }
        
        # If direct page not found, try search
        search_params = {
            "action": "query",
            "format": "json",
            "list": "search",
            "srsearch": query,
            "srlimit": 1
        }
        
        search_response = requests.get(api_url, params=search_params, headers=headers)
        if search_response.status_code == 200:
            search_data = search_response.json()
            if search_data.get('query', {}).get('search'):
                top_result = search_data['query']['search'][0]
                return fetch_wikipedia(top_result['title'])
                
    except Exception as e:
        print(f"Wikipedia error: {e}")
    
    return None


In [6]:
def fetch_arxiv(query: str, max_results: int = 2) -> List[Dict]:
    """Fetch academic papers from arXiv"""
    try:
        client = arxiv.Client()
        search = arxiv.Search(
            query=query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.Relevance
        )
        
        results = []
        for paper in client.results(search):
            # Clean summary
            summary = paper.summary
            summary = summary.replace('\n', ' ').replace('  ', ' ')
            if len(summary) > 300:
                summary = summary[:300] + "..."
            
            results.append({
                "title": paper.title,
                "content": summary,
                "url": paper.entry_id,
                "source": "arXiv",
                "type": "academic",
                "authors": [str(author) for author in paper.authors[:3]],
                "published": paper.published.strftime("%Y-%m-%d")
            })
        
        return results
        
    except Exception as e:
        print(f"arXiv error: {e}")
        return []

# %% [markdown]
# ## Step 4: NewsAPI Fetcher

# %%
def fetch_news(query: str, max_results: int = 2) -> List[Dict]:
    """Fetch news articles from NewsAPI"""
    NEWS_API_KEY = os.getenv('NEWS_API_KEY')
    if not NEWS_API_KEY:
        print("‚ö†Ô∏è NewsAPI key not found")
        return []
    
    try:
        news_url = "https://newsapi.org/v2/everything"
        params = {
            "q": query,
            "apiKey": NEWS_API_KEY,
            "pageSize": max_results,
            "language": "en",
            "sortBy": "relevancy"
        }
        
        response = requests.get(news_url, params=params)
        
        if response.status_code == 200:
            data = response.json()
            articles = data.get('articles', [])
            
            results = []
            for article in articles:
                if article.get('title') and article.get('title') != "[Removed]":
                    content = article.get('description') or article.get('content') or ""
                    if len(content) > 200:
                        content = content[:200] + "..."
                    
                    results.append({
                        "title": article['title'],
                        "content": content,
                        "url": article.get('url', '#'),
                        "source": article.get('source', {}).get('name', 'Unknown'),
                        "type": "news",
                        "published": article.get('publishedAt', '')[:10]
                    })
            
            return results
        else:
            print(f"NewsAPI error: HTTP {response.status_code}")
            return []
            
    except Exception as e:
        print(f"NewsAPI error: {e}")
        return []

In [7]:
def generate_summary(query: str, sources: List[Dict]) -> Dict:
    """Use OpenAI to synthesize information from all sources"""
    
    # Format sources for prompt
    formatted_sources = ""
    for i, source in enumerate(sources, 1):
        formatted_sources += f"\n\n[Source {i} - {source['source']}]"
        formatted_sources += f"\nTitle: {source['title']}"
        formatted_sources += f"\nContent: {source['content']}"
        formatted_sources += f"\nType: {source['type']}"
        if source.get('authors'):
            formatted_sources += f"\nAuthors: {', '.join(source['authors'])}"
    
    system_prompt = """You are a research assistant. Your task:
1. Answer the user's question comprehensively
2. Use ONLY information from the provided sources
3. Cite sources as [1], [2], [3] etc.
4. If information is contradictory, mention this
5. If sources lack information, say so
6. Format answer with clear paragraphs and bullet points where helpful"""

    user_prompt = f"""Question: {query}

Available Sources:{formatted_sources}

Please provide a well-structured answer with citations:"""

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=800,
            temperature=0.7
        )
        
        return {
            "answer": response.choices[0].message.content,
            "tokens_used": response.usage.total_tokens,
            "model": response.model
        }
        
    except Exception as e:
        print(f"OpenAI error: {e}")
        return {
            "answer": f"Error generating summary: {str(e)}",
            "tokens_used": 0,
            "model": "error"
        }

In [10]:
def research_assistant(query: str) -> Dict:
    """Main function to orchestrate research"""
    print(f"üîç Researching: '{query}'")
    print("-" * 50)
    
    # Step 1: Fetch from all sources
    print("üìö Gathering information from sources...")
    
    # Fetch in sequence (to avoid rate limits)
    wikipedia_data = fetch_wikipedia(query)
    arxiv_data = fetch_arxiv(query)
    news_data = fetch_news(query)
    
    # Combine all sources
    all_sources = []
    if wikipedia_data:
        all_sources.append(wikipedia_data)
    all_sources.extend(arxiv_data)
    all_sources.extend(news_data)
    
    print(f"‚úÖ Found {len(all_sources)} sources:")
    for i, source in enumerate(all_sources, 1):
        print(f"  {i}. {source['source']}: {source['title'][:50]}...")
    
    if not all_sources:
        return {
            "answer": "‚ùå No relevant sources found for your query.",
            "sources": [],
            "tokens_used": 0
        }

        print("\nü§ñ Generating comprehensive answer...")
    summary_result = generate_summary(query, all_sources)
    
    return {
        "answer": summary_result["answer"],
        "sources": all_sources,
        "tokens_used": summary_result["tokens_used"],
        "model": summary_result.get("model", "gpt-3.5-turbo")
    }

In [12]:
test_query = "What is artificial intelligence?"

result = research_assistant(test_query)

display(Markdown(f"## ü§ñ Answer\n{result['answer']}"))

display(Markdown("## üìö Sources Used"))

for i, source in enumerate(result['sources'], 1):
    display(Markdown(f"""
    ### {i}. {source['title']}
    **Source:** {source['source']} ({source['type']})
    
    **Content Preview:** {source['content']}
    
    **URL:** [Open Link]({source['url']})
    """))

display(Markdown(f"""
## üìä Statistics
- **Total Sources:** {len(result['sources'])}
- **Tokens Used:** {result['tokens_used']}
- **Model:** {result.get('model', 'gpt-3.5-turbo')}
"""))

üîç Researching: 'What is artificial intelligence?'
--------------------------------------------------
üìö Gathering information from sources...
‚úÖ Found 5 sources:
  1. Wikipedia: Artificial intelligence...
  2. arXiv: The Artificial Scientist: Logicist, Emergentist, a...
  3. arXiv: Compression, The Fermi Paradox and Artificial Supe...
  4. The Next Web: A 2025 recap for Tech & AI...
  5. Theregister.com: Recline of the machines: Terminator felled by dodg...


## ü§ñ Answer
Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence [1]. Here is a comprehensive understanding of artificial intelligence based on the provided sources:

1. **Definition**: 
   - AI involves tasks such as learning, reasoning, problem-solving, perception, and decision-making.
   - It is a field of research in computer science that focuses on developing methods and software that enable machines to perceive their environment and use learning and intelligence to achieve defined goals [1].

2. **Types of AI**:
   - **Artificial General Intelligence (AGI)**: This refers to AI that has the ability to understand, learn, and apply knowledge across different domains, similar to human intelligence. AGI is discussed in academic papers exploring different approaches to achieving it [2,3].
   - **Generative AI**: Mentioned in a news article, generative AI was a significant trend in technology in the years leading up to 2025. It involves creating AI systems capable of generating content, such as images and text [4].

3. **Challenges and Considerations**:
   - **Communication and Control**: There are discussions about the challenges associated with communicating with and controlling AGI. This includes concerns about managing artificial super-intelligence and the implications of the Fermi Paradox [3].
   - **Hybrid Approaches**: Some researchers argue that a unified or hybrid approach to AGI is necessary [2]. This suggests that a combination of different AI methodologies may be needed to achieve more advanced AI capabilities.

4. **Recent Trends**:
   - The year 2025 was highlighted as a significant period when technology, including AI, transitioned from a futuristic concept to a present reality. Developments in generative AI and platform innovation were notable during this time [4].
   - However, news articles also humorously depict the challenges AI may face, such as the Terminator character needing a battery recharge [5].

5. **Gaps in Information**:
   - While the sources provide insights into the definition, types, challenges, and trends related to AI, there may be a lack of detailed information on specific AI technologies, applications, or breakthroughs in the field in recent years.

In conclusion, artificial intelligence encompasses a broad range of capabilities that mimic human intelligence. Researchers are exploring various approaches, including AGI and generative AI, to advance the field further. Challenges related to communication, control, and hybrid AI approaches are topics of interest in the academic community, while practical applications of AI continue to evolve in technology trends.

## üìö Sources Used


    ### 1. Artificial intelligence
    **Source:** Wikipedia (encyclopedia)
    
    **Content Preview:** Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.
High-profile applications of AI incl...
    
    **URL:** [Open Link](https://en.wikipedia.org/?curid=1164)
    


    ### 2. The Artificial Scientist: Logicist, Emergentist, and Universalist Approaches to Artificial General Intelligence
    **Source:** arXiv (academic)
    
    **Content Preview:** We attempt to define what is necessary to construct an Artificial Scientist, explore and evaluate several approaches to artificial general intelligence (AGI) which may facilitate this, conclude that a unified or hybrid approach is necessary and explore two theories that satisfy this requirement to s...
    
    **URL:** [Open Link](http://arxiv.org/abs/2110.01831v1)
    


    ### 3. Compression, The Fermi Paradox and Artificial Super-Intelligence
    **Source:** arXiv (academic)
    
    **Content Preview:** The following briefly discusses possible difficulties in communication with and control of an AGI (artificial general intelligence), building upon an explanation of The Fermi Paradox and preceding work on symbol emergence and artificial general intelligence. The latter suggests that to infer what so...
    
    **URL:** [Open Link](http://arxiv.org/abs/2110.01835v1)
    


    ### 4. A 2025 recap for Tech & AI
    **Source:** The Next Web (news)
    
    **Content Preview:** 2025 was the year technology stopped being tomorrow‚Äôs promise and became today‚Äôs anchor. What began as a surge in generative AI and platform innovation two years prior crystallized this year into conc...
    
    **URL:** [Open Link](https://thenextweb.com/news/a-2025-recap-for-tech-ai)
    


    ### 5. Recline of the machines: Terminator felled by dodgy battery
    **Source:** Theregister.com (news)
    
    **Content Preview:** The rise will be postponed until you hit F1 to continue
Bork!Bork!Bork! The baddest of AI bad guys, the Terminator, has confirmed what the vast majority of IT professionals already know. The machines ...
    
    **URL:** [Open Link](https://www.theregister.com/2026/01/07/terminator_felled_by_dodgy_battery/)
    


## üìä Statistics
- **Total Sources:** 5
- **Tokens Used:** 1061
- **Model:** gpt-3.5-turbo-0125


In [13]:
def interactive_test():
    """Interactive testing loop"""
    print("üéØ Universal Research Assistant - Interactive Mode")
    print("=" * 60)
    print("Type 'quit' to exit\n")
    
    while True:
        query = input("\nüìù Enter your research question: ").strip()
        
        if query.lower() in ['quit', 'exit', 'q']:
            print("üëã Goodbye!")
            break
        
        if not query:
            continue
        
        print("\n" + "=" * 60)
        result = research_assistant(query)
        
        # Display answer
        print("\n" + "ü§ñ ANSWER:")
        print("-" * 40)
        print(result['answer'])
        
        # Display source count
        print(f"\nüìö Used {len(result['sources'])} sources")
        print(f"‚ö° Used {result['tokens_used']} tokens")
        
        # Show sources
        if result['sources']:
            print("\nüìñ Sources:")
            for i, source in enumerate(result['sources'], 1):
                print(f"  {i}. [{source['source']}] {source['title'][:60]}...")

In [14]:
test_queries = [
    "What is machine learning?",
    "Explain quantum computing",
    "Latest developments in renewable energy",
    "What is climate change?"
]

print("üß™ Batch Testing Multiple Queries")
print("=" * 60)

for i, query in enumerate(test_queries[:2], 1):  # Test first 2
    print(f"\n{i}. Query: '{query}'")
    result = research_assistant(query)
    print(f"   Sources: {len(result['sources'])} | Tokens: {result['tokens_used']}")
    print(f"   Answer preview: {result['answer'][:100]}...")
    time.sleep(2)  # Avoid rate limiting

üß™ Batch Testing Multiple Queries

1. Query: 'What is machine learning?'
üîç Researching: 'What is machine learning?'
--------------------------------------------------
üìö Gathering information from sources...
‚úÖ Found 5 sources:
  1. Wikipedia: Machine learning...
  2. arXiv: Changing Data Sources in the Age of Machine Learni...
  3. arXiv: DOME: Recommendations for supervised machine learn...
  4. 9to5Mac: Apple shared ‚Äòbendgate‚Äô lessons as it helped small...
  5. Search Engine Journal: 10 Hard Truths About PPC: Insights From Last Year‚Äô...
   Sources: 5 | Tokens: 813
   Answer preview: Machine learning (ML) is a field within artificial intelligence that focuses on the development and ...

2. Query: 'Explain quantum computing'
üîç Researching: 'Explain quantum computing'
--------------------------------------------------
üìö Gathering information from sources...
‚úÖ Found 5 sources:
  1. Wikipedia: Quantum computing...
  2. arXiv: Tierkreis: A Dataflow Framework for Hybr

In [15]:
def export_results(result: Dict, format: str = "markdown"):
    """Export research results"""
    if format == "markdown":
        content = f"# Research Results\n\n"
        content += f"**Query:** {test_query}\n\n"
        content += f"## Answer\n{result['answer']}\n\n"
        content += f"## Sources\n"
        
        for i, source in enumerate(result['sources'], 1):
            content += f"\n### {i}. {source['title']}\n"
            content += f"- **Source:** {source['source']}\n"
            content += f"- **Type:** {source['type']}\n"
            content += f"- **URL:** {source['url']}\n"
            content += f"- **Preview:** {source['content']}\n"
        
        content += f"\n## Statistics\n"
        content += f"- Total Sources: {len(result['sources'])}\n"
        content += f"- Tokens Used: {result['tokens_used']}\n"
        
        # Save to file
        with open("research_results.md", "w", encoding="utf-8") as f:
            f.write(content)
        
        print("‚úÖ Results exported to research_results.md")
        return content

# Export the test results
exported = export_results(result, "markdown")

‚úÖ Results exported to research_results.md


In [18]:
import os
import sys
from dotenv import load_dotenv

# Load environment
load_dotenv('backend/.env')

# Check API keys
if not os.getenv('OPENAI_API_KEY'):
    print("‚ùå OpenAI API key missing! Add to backend/.env")
    sys.exit(1)

print("üîç Research Assistant - Quick Test")
print("=" * 50)

# Test with a simple question
test_question = input("\nüìù Enter your question (or press Enter for default): ").strip()

if not test_question:
    test_question = "What is artificial intelligence?"
    print(f"Using default: '{test_question}'")

print(f"\nResearching: '{test_question}'")
print("Fetching data from sources...")

# Import our functions
try:
    # Add backend to path
    sys.path.append('backend')
    
    # Create a simple test class
    import requests
    import arxiv
    from openai import OpenAI
    
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    # Simple Wikipedia fetch
    def get_wikipedia(query):
        try:
            api_url = "https://en.wikipedia.org/w/api.php"
            params = {
                "action": "query",
                "format": "json",
                "prop": "extracts",
                "exintro": True,
                "explaintext": True,
                "titles": query.replace(" ", "_")
            }
            response = requests.get(api_url, params=params)
            if response.status_code == 200:
                data = response.json()
                pages = data.get("query", {}).get("pages", {})
                for page_id, page_info in pages.items():
                    if page_id != "-1":
                        return page_info.get('extract', '')[:300]
        except:
            pass
        return None
    
    # Simple OpenAI answer
    def get_answer(question, context=None):
        messages = [
            {"role": "system", "content": "You are a helpful research assistant."},
            {"role": "user", "content": f"Question: {question}"}
        ]
        
        if context:
            messages.insert(1, {"role": "assistant", "content": f"Context: {context}"})
        
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages,
            max_tokens=300
        )
        return response.choices[0].message.content
    
    # Get data
    print("\nüìö Checking sources...")
    
    # Try Wikipedia
    wiki_content = get_wikipedia(test_question)
    if wiki_content:
        print("‚úÖ Wikipedia: Found information")
    
    # Try arXiv
    try:
        arxiv_client = arxiv.Client()
        search = arxiv.Search(query=test_question, max_results=1)
        results = list(arxiv_client.results(search))
        if results:
            print("‚úÖ arXiv: Found academic papers")
    except:
        pass
    
    # Get answer
    print("\nü§ñ Generating answer...")
    answer = get_answer(test_question, wiki_content)
    
    print("\n" + "=" * 50)
    print("‚úÖ RESULT:")
    print("=" * 50)
    print(f"\nQuestion: {test_question}")
    print(f"\nAnswer: {answer}")
    print("\n" + "=" * 50)
    
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    print("\nMake sure all packages are installed:")
    print("pip install openai requests arxiv python-dotenv")

üîç Research Assistant - Quick Test

Researching: 'what are the usesu pf you what will you can genearte'
Fetching data from sources...

üìö Checking sources...
‚úÖ arXiv: Found academic papers

ü§ñ Generating answer...

‚úÖ RESULT:

Question: what are the usesu pf you what will you can genearte

Answer: I'm sorry, but it seems there may be a typo or error in your question. Could you please provide more context or clarify your question so I can assist you better?

