# Lesson 1: Simple ReAct Agent from Scratch

In [1]:
# based on https://til.simonwillison.net/llms/python-react-pattern

In [2]:
import openai
import re
import httpx
import os
from dotenv import load_dotenv

_ = load_dotenv()
from openai import OpenAI

In [3]:
client = OpenAI()

In [4]:
chat_completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "Hello world"}]
)

In [5]:
# Melvin S - 212222040098

In [6]:
chat_completion.choices[0].message.content

'Hello! How can I assist you today?'

In [7]:
class Agent:
    def __init__(self, system=""):
        self.system = system
        self.messages = []
        if self.system:
            self.messages.append({"role": "system", "content": system})

    def __call__(self, message):
        self.messages.append({"role": "user", "content": message})
        result = self.execute()
        self.messages.append({"role": "assistant", "content": result})
        return result

    def execute(self):
        completion = client.chat.completions.create(
                        model="gpt-4o", 
                        temperature=0,
                        messages=self.messages)
        return completion.choices[0].message.content
    

In [8]:
research_prompt = """
You are a Research Assistant Agent that helps with information gathering and analysis.
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer.

Use Thought to describe your reasoning about the research question.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.

Your available actions are:

web_search:
e.g. web_search: Python machine learning libraries
Simulates searching the web for information on a topic

summarize_text:
e.g. summarize_text: [long text content]
Creates a concise summary of the provided text

calculate_stats:
e.g. calculate_stats: [1, 2, 3, 4, 5]
Calculates basic statistics (mean, median, std) for a list of numbers

translate_text:
e.g. translate_text: Hello world -> Spanish
Translates text to another language

Example session:

Question: What are the best Python libraries for machine learning and their key features?
Thought: I need to search for information about Python ML libraries and then summarize the findings
Action: web_search: best Python machine learning libraries 2024
PAUSE

You will be called again with this:

Observation: Found information about scikit-learn, TensorFlow, PyTorch, etc.

Then continue with your analysis...

Answer: Based on my research, the top Python ML libraries are...
""".strip()

In [9]:
import statistics
import random

def web_search(query):
    search_results = {
        "python machine learning": "Found: scikit-learn (easy-to-use), TensorFlow (deep learning), PyTorch (research-friendly), XGBoost (gradient boosting)",
        "data science tools": "Found: pandas (data manipulation), numpy (numerical computing), matplotlib (visualization), jupyter (notebooks)",
        "natural language processing": "Found: NLTK (comprehensive), spaCy (industrial-strength), transformers (state-of-the-art), gensim (topic modeling)",
        "computer vision": "Found: OpenCV (traditional CV), PIL/Pillow (image processing), scikit-image (image analysis)"
    }

    for key in search_results:
        if any(word in query.lower() for word in key.split()):
            return search_results[key]
    
    return f"Search results for '{query}': Found general information about the topic"

def summarize_text(text):

    if len(text) <= 100:
        return f"Summary: {text}"
    
    sentences = text.split('. ')
    if len(sentences) <= 2:
        return f"Summary: {text[:100]}..."
    
    summary = f"{sentences[0]}. {sentences[-1]}"
    return f"Summary: {summary}"

def calculate_stats(numbers_str):
    try:
        # Parse the string representation of numbers
        numbers_str = numbers_str.strip('[]')
        numbers = [float(x.strip()) for x in numbers_str.split(',')]
        
        mean_val = statistics.mean(numbers)
        median_val = statistics.median(numbers)
        
        if len(numbers) > 1:
            std_val = statistics.stdev(numbers)
        else:
            std_val = 0
            
        return f"Statistics: Mean={mean_val:.2f}, Median={median_val:.2f}, Std Dev={std_val:.2f}, Count={len(numbers)}"
    except Exception as e:
        return f"Error calculating statistics: {e}"

def translate_text(text_and_language):
    translations = {
        "spanish": {"hello": "hola", "world": "mundo", "good": "bueno", "morning": "mañana"},
        "french": {"hello": "bonjour", "world": "monde", "good": "bon", "morning": "matin"},
        "german": {"hello": "hallo", "world": "welt", "good": "gut", "morning": "morgen"}
    }
    
    try:
        text, target_lang = text_and_language.split(' -> ')
        text = text.strip().lower()
        target_lang = target_lang.strip().lower()
        
        if target_lang in translations:
            words = text.split()
            translated_words = []
            for word in words:
                if word in translations[target_lang]:
                    translated_words.append(translations[target_lang][word])
                else:
                    translated_words.append(f"[{word}]")  # Keep untranslated words in brackets
            return f"Translation to {target_lang}: {' '.join(translated_words)}"
        else:
            return f"Translation to {target_lang}: [Simulated translation of '{text}']"
    except:
        return f"Translation: [Simulated translation of the provided text]"

research_actions = {
    "web_search": web_search,
    "summarize_text": summarize_text, 
    "calculate_stats": calculate_stats,
    "translate_text": translate_text
}

In [10]:
def research_query(question, max_turns=7):
    i = 0
    research_bot = Agent(research_prompt)
    next_prompt = question
    
    print(f" Research Assistant starting investigation: {question}")
    print("=" * 60)
    
    while i < max_turns:
        i += 1
        print(f"\n Turn {i}:")
        result = research_bot(next_prompt)
        print(result)
        
        # Parse actions from the result
        actions = [
            action_re.match(a) 
            for a in result.split('\n') 
            if action_re.match(a)
        ]
        
        if actions:
            # There is an action to run
            action, action_input = actions[0].groups()
            if action not in research_actions:
                print(f" Unknown action: {action}: {action_input}")
                break
                
            print(f"\n🔧 Executing: {action}({action_input})")
            observation = research_actions[action](action_input)
            print(f" Observation: {observation}")
            next_prompt = "Observation: {}".format(observation)
        else:
            print("\n Research complete!")
            return result
            
    print(f"\n Reached maximum turns ({max_turns})")
    return result

In [11]:
# Test 1: Research about machine learning libraries
action_re = re.compile(r"^Action:\s*(\w+):\s*(.+)$")
print("                                                          Name : Melvin S\n                                                    Register number : 212222040098")
ml_question = "What are the most popular Python machine learning libraries and their main use cases?"
research_query(ml_question)

                                                          Name : Melvin S
                                                    Register number : 212222040098
 Research Assistant starting investigation: What are the most popular Python machine learning libraries and their main use cases?

 Turn 1:
Thought: To answer this question, I need to search for information about the most popular Python machine learning libraries and their main use cases. This will help me identify the key libraries and what they are typically used for in the field of machine learning.
Action: web_search: most popular Python machine learning libraries and their use cases 2024
PAUSE

🔧 Executing: web_search(most popular Python machine learning libraries and their use cases 2024)
 Observation: Found: scikit-learn (easy-to-use), TensorFlow (deep learning), PyTorch (research-friendly), XGBoost (gradient boosting)

 Turn 2:
Answer: Based on the information found, the most popular Python machine learning libraries and th

'Answer: Based on the information found, the most popular Python machine learning libraries and their main use cases are:\n\n1. **Scikit-learn**: Known for its ease of use, scikit-learn is widely used for traditional machine learning tasks such as classification, regression, clustering, and dimensionality reduction. It provides simple and efficient tools for data mining and data analysis.\n\n2. **TensorFlow**: Developed by Google, TensorFlow is a powerful library for deep learning applications. It is used for building and training neural networks, and is particularly popular for large-scale machine learning models and production-level deployment.\n\n3. **PyTorch**: Favored in the research community, PyTorch is known for its flexibility and ease of use in developing deep learning models. It is often used for prototyping and research due to its dynamic computation graph and intuitive interface.\n\n4. **XGBoost**: This library is specifically designed for gradient boosting, a powerful mac

In [12]:
# Test 2: Data analysis task
print("                                                          Name : melvin S S\n                                                    Register number : 212222040098")
data_question = "I have these test scores: [85, 92, 78, 96, 88, 91, 84, 89]. Can you analyze them and tell me what they mean?"
research_query(data_question)

                                                          Name : melvin S S
                                                    Register number : 212222040098
 Research Assistant starting investigation: I have these test scores: [85, 92, 78, 96, 88, 91, 84, 89]. Can you analyze them and tell me what they mean?

 Turn 1:
Thought: To analyze the test scores, I need to calculate basic statistics such as the mean, median, and standard deviation. This will help in understanding the distribution and central tendency of the scores.
Action: calculate_stats: [85, 92, 78, 96, 88, 91, 84, 89]
PAUSE

🔧 Executing: calculate_stats([85, 92, 78, 96, 88, 91, 84, 89])
 Observation: Statistics: Mean=87.88, Median=88.50, Std Dev=5.54, Count=8

 Turn 2:
Answer: The analysis of the test scores [85, 92, 78, 96, 88, 91, 84, 89] reveals the following:

- The mean (average) score is 87.88, indicating that the average performance across all tests is just under 88.
- The median score is 88.50, which suggests that

'Answer: The analysis of the test scores [85, 92, 78, 96, 88, 91, 84, 89] reveals the following:\n\n- The mean (average) score is 87.88, indicating that the average performance across all tests is just under 88.\n- The median score is 88.50, which suggests that half of the scores are below 88.50 and half are above, showing a balanced distribution around the middle.\n- The standard deviation is 5.54, indicating that the scores are relatively close to the mean, with moderate variability.\n- There are a total of 8 scores in the dataset.\n\nOverall, the scores are fairly consistent with a slight spread around the average, and the performance is generally high.'

In [13]:
# Test 3: Multi-step research with translation
print("                                                          Name : Melvin S S\n                                                    Register number : 212222040098")
translation_question = "Research natural language processing tools and then translate 'good morning' to Spanish"
research_query(translation_question)

                                                          Name : Melvin S S
                                                    Register number : 212222040098
 Research Assistant starting investigation: Research natural language processing tools and then translate 'good morning' to Spanish

 Turn 1:
Thought: I need to first gather information on natural language processing (NLP) tools and then translate the phrase 'good morning' to Spanish. I will start by searching for information on NLP tools.
Action: web_search: natural language processing tools 2024
PAUSE

🔧 Executing: web_search(natural language processing tools 2024)
 Observation: Found: pandas (data manipulation), numpy (numerical computing), matplotlib (visualization), jupyter (notebooks)

 Turn 2:
Thought: The search results provided information on general data science tools rather than specific natural language processing tools. I need to refine my search to focus specifically on NLP tools.
Action: web_search: top natural lan

"Thought: The translation provided is incorrect. The correct translation for 'good morning' in Spanish is 'buenos días'. I will provide this correct translation and then consider how to address the issue with finding NLP tools.\n\nAnswer: The correct translation of 'good morning' to Spanish is 'buenos días'. Regarding NLP tools, I encountered difficulties retrieving specific information. Common NLP tools include NLTK, spaCy, and Hugging Face's Transformers. If you need more detailed information, please let me know!"