In [1]:
# Import required modules and initialize the builder from open_deep_research
import uuid 
import os, getpass
import open_deep_research   
print(open_deep_research.__version__) 
from IPython.display import Image, display, Markdown
from langgraph.types import Command
from langgraph.checkpoint.memory import MemorySaver
from open_deep_research.graph import builder
import requests
import json
from typing import Dict, Any, List

0.0.15


In [2]:
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [3]:
def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

# _set_env("OPENAI_API_KEY")
# _set_env("ANTHROPIC_API_KEY")
_set_env("TAVILY_API_KEY")
_set_env("GROQ_API_KEY")
# _set_env("PERPLEXITY_API_KEY")

In [None]:
# def get_search_results(query: str, max_results: int = 3) -> List[Dict[str, Any]]:
#     """Get search results from Tavily API directly"""
#     tavily_api_key = os.environ.get("TAVILY_API_KEY")
#     if not tavily_api_key:
#         raise ValueError("TAVILY_API_KEY is not set in environment variables")
        
#     search_url = "https://api.tavily.com/search"
#     search_params = {
#         "api_key": tavily_api_key,
#         "query": query,
#         "search_depth": "deep",
#         "max_results": max_results
#     }
    
#     try:
#         search_response = requests.post(search_url, json=search_params)
#         search_response.raise_for_status()  # Raise exception for HTTP errors
#         search_data = search_response.json()
        
#         if "results" in search_data:
#             results = search_data["results"]
#             for i, res in enumerate(results):
#                 print(f"Result {i+1} Date: {res.get('published_date', 'No date field')}")
#             return results
#         else:
#             print(f"Unexpected search response format: {search_data}")
#             return []
#     except requests.exceptions.RequestException as e:
#         print(f"Search API error: {str(e)}")
#         return []


In [7]:
def get_search_results(query: str, max_results: int = 3) -> List[Dict[str, Any]]:
    tavily_api_key = os.environ.get("TAVILY_API_KEY")
    if not tavily_api_key:
        raise ValueError("TAVILY_API_KEY is not set in environment variables")
        
    search_url = "https://api.tavily.com/search"
    search_params = {
        "api_key": tavily_api_key,
        "query": query,
        "search_depth": "basic",
        "max_results": max_results
    }

    try:
        search_response = requests.post(search_url, json=search_params)
        search_response.raise_for_status()  # Raise exception for HTTP errors
        search_data = search_response.json()
        
        results = search_data.get("results", [])
        if results:
            for i, res in enumerate(results, 1):
                date = res.get("published_date") or res.get("date") or "No date available"
                print(f"Result {i} Date: {date}")
            return results
        else:
            print("Unexpected search response format:", search_data)
            return []
    except requests.exceptions.RequestException as e:
        print(f"Search API error: {str(e)}")
        return []


In [8]:
def generate_report(search_results: List[Dict[str, Any]], topic: str) -> str:
    """Generate report using Groq API directly"""
    groq_api_key = os.environ.get("GROQ_API_KEY")
    if not groq_api_key:
        raise ValueError("GROQ_API_KEY is not set in environment variables")
        
    groq_url = "https://api.groq.com/openai/v1/chat/completions"
    groq_headers = {
        "Authorization": f"Bearer {groq_api_key}",
        "Content-Type": "application/json"
    }
    
    # Format search results for the prompt
    formatted_results = ""
    for i, result in enumerate(search_results, 1):
        title = result.get("title", "Untitled Source")
        url = result.get("url", "No URL provided")
        content_snippet = result.get("content", "")[:200]  # First 200 chars of content
        formatted_results += f"Source {i}: {title}\nURL: {url}\nExcerpt: {content_snippet}...\n\n"
    
    # Create a more structured prompt
    prompt = f"""Based on these search results about {topic}, create a concise report following this structure:

1. Introduction (20 sentences)
   - Brief overview of {topic}

2. Key Concepts (34 bullet points)
   - Main ideas related to {topic}

3. Applications or Implications (23 sentences)
   - How {topic} is being used or its importance

4. Summary Table or List
   - A concise summary of the main points

SEARCH RESULTS:
{formatted_results}

Keep your total response under 300 lines."""
    
    groq_data = {
        "model": "meta-llama/llama-4-scout-17b-16e-instruct",  # Updated to use a more capable model
        "messages": [
            {"role": "system", "content": "You create concise, factual reports based on provided search results."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 1000,
        "temperature": 0.3
    }
    
    try:
        groq_response = requests.post(groq_url, headers=groq_headers, json=groq_data)
        groq_response.raise_for_status()  # Raise exception for HTTP errors
        groq_data = groq_response.json()
        
        if "choices" in groq_data and len(groq_data["choices"]) > 0:
            result = groq_data["choices"][0]["message"]["content"]
            return result
        else:
            error_msg = f"Unexpected Groq API response: {groq_data}"
            print(error_msg)
            return error_msg
    except requests.exceptions.RequestException as e:
        error_msg = f"Groq API error: {str(e)}"
        print(error_msg)
        return error_msg

In [10]:

# def set_env(var: str):
#     if not os.environ.get(var):
#         os.environ[var] = getpass.getpass(f"{var}: ")

# # Set necessary API keys
# set_env("TAVILY_API_KEY")
# set_env("GROQ_API_KEY")

# Main execution function
def run_research(topic: str, search_query: str = None):
    """Run the research pipeline for a given topic"""
    if search_query is None:
        search_query = f"{topic} explanation concepts applications"
        
    print(f"Searching for information about '{topic}'...")
    search_results = get_search_results(search_query, max_results=3)

    if search_results:
        print(f"Found {len(search_results)} search results. Generating report...")
        report = generate_report(search_results, topic)
        display(Markdown(f"# Report on {topic}\n\n{report}"))
        
        # Display sources
        sources_md = "## Sources\n\n"
        for i, result in enumerate(search_results, 1):
            title = result.get("title", "Untitled")
            url = result.get("url", "#")
            sources_md += f"{i}. [{title}]({url})\n"
        
        display(Markdown(sources_md))
    else:
        print("No search results found. Please check your Tavily API key and try again.")


# Example usage
if __name__ == "__main__":
    run_research("NLP", search_query="nlp explanation concepts applications")

Searching for information about 'NLP'...
Result 1 Date: No date available
Result 2 Date: No date available
Result 3 Date: No date available
Found 3 search results. Generating report...


# Report on NLP

**Introduction**

Natural Language Processing (NLP) is a subfield of artificial intelligence (AI) that deals with the interaction between computers and humans in natural language. It is a rapidly growing area of AI, with applications in text generators, chatbots, and more. NLP enables computers to understand, interpret, and generate human language, allowing humans to communicate with machines more effectively. The field of NLP has gained significant attention in recent years due to the increasing amount of text data available from social media, websites, and other sources. This has led to the development of various NLP techniques and tools that can analyze, understand, and generate human language. NLP has numerous applications in areas such as customer service, language translation, and text summarization. The goal of NLP is to enable computers to process and understand human language, allowing for more efficient and effective communication between humans and machines. NLP combines computer science, linguistics, and cognitive psychology to achieve this goal. With the growing amount of text data, NLP is becoming a key tool to gain insights and automate tasks. NLP has the potential to revolutionize the way humans interact with machines. It is an interdisciplinary field that requires expertise in computer science, linguistics, and cognitive psychology. NLP has many applications in industries such as healthcare, finance, and education. The field of NLP is rapidly evolving, with new techniques and tools being developed continuously. NLP has the potential to transform the way we interact with machines. It is an exciting and rapidly growing field. NLP has many challenges, including dealing with ambiguity and uncertainty in human language. Despite these challenges, NLP has many applications in areas such as sentiment analysis and text classification. NLP is a key technology for the development of intelligent systems. 

**Key Concepts**

* Tokenization
* Part-of-speech tagging
* Named entity recognition
* Dependency parsing
* Sentiment analysis
* Text classification
* Language modeling
* Machine translation
* Text generation
* Dialogue systems
* Question answering
* Information retrieval
* Text summarization
* Coreference resolution
* Discourse analysis
* Pragmatics
* Semantics
* Syntax
* Morphology
* Lexicon
* Corpus linguistics
* Deep learning
* Neural networks
* Word embeddings
* Language transfer
* Adversarial training
* Attention mechanisms
* Transformers
* BERT
* RoBERTa
* XLNet
* Natural language understanding
* Natural language generation
* Human-computer interaction
* Human language
* Linguistic analysis
* Computational linguistics
* Statistical NLP
* Hybrid NLP

**Applications or Implications**

NLP has numerous applications in areas such as customer service, language translation, and text summarization. It is being used to develop chatbots that can understand and respond to customer inquiries. NLP is also being used in sentiment analysis to analyze customer feedback and sentiment. It is being used in language translation to translate text from one language to another. NLP has applications in healthcare, finance, and education. It is being used to analyze medical text and diagnose diseases. NLP is being used to develop intelligent systems that can understand and respond to human language. It has the potential to revolutionize the way humans interact with machines. NLP has many implications for industries such as customer service, marketing, and sales. It is being used to develop more efficient and effective communication systems. NLP has the potential to improve human-computer interaction. It is being used to develop more intelligent and responsive machines. NLP has many applications in areas such as information retrieval and text classification. 

**Summary Table or List**

| **Category** | **Key Points** |
| --- | --- |
| **Introduction** | NLP, AI, human language |
| **Key Concepts** | 17 concepts, including tokenization, sentiment analysis, and machine translation |
| **Applications** | Customer service, language translation, text summarization, healthcare, finance, education |
| **Implications** | Improved human-computer interaction, more efficient communication systems, intelligent systems |

## Sources

1. [Natural Language Processing (NLP) [A Complete Guide] - DeepLearning.AI](https://www.deeplearning.ai/resources/natural-language-processing/)
2. [What is Natural Language Processing (NLP)? A Beginner's Guide](https://www.datacamp.com/blog/what-is-natural-language-processing)
3. [Natural Language Processing (NLP) - Overview | GeeksforGeeks](https://www.geeksforgeeks.org/natural-language-processing-overview/)
