Doron Rabinian, ID 204636484<BR>
Natalia Meergus, ID 319374039

In [1]:
!pip install --upgrade --quiet langchain-core langchain-openai
!pip install --quiet duckduckgo_search

In [2]:
import contextlib
import os
import json
from typing import List, Any, Dict
import io

from langchain_core.tools import tool, BaseTool
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import AzureChatOpenAI
from duckduckgo_search import DDGS
from langchain_core.runnables import Runnable

In [3]:
AZURE_OPENAI_API_KEY = 'e865ffe4387f416ea57d7394ee09fb2e'
AZURE_OPENAI_ENDPOINT = 'https://openaifor3267.openai.azure.com/'
AZURE_OPENAI_API_VERSION = '2024-02-01'
AZURE_OPENAI_DEPLOYMENT = 'gpt4'

In [4]:
os.environ['AZURE_OPENAI_API_KEY'] = AZURE_OPENAI_API_KEY
os.environ['AZURE_OPENAI_ENDPOINT'] = AZURE_OPENAI_ENDPOINT

llm = AzureChatOpenAI(
    azure_deployment=AZURE_OPENAI_DEPLOYMENT,
    api_version=AZURE_OPENAI_API_VERSION,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

In [5]:
@tool
def extract_entities_from_file(file_name: str, entity_type: str) -> str:
    """
  The function is given a text file and an “entity type” (could be
  “student”, “hobby”, “city” or any type of entity). It returns a
  string representing a list of entities of that type in the file
  (e.g., “[‘Haifa’, ‘Tel Aviv’]”).
  """
    try:
        with open(file_name, 'r') as file:
            file_content = file.read()
    except FileNotFoundError:
        return f"Error: File {file_name} not found."
    except Exception as e:
        return f"Error: {str(e)}"
    if not file_content.strip():
        return json.dumps([])  # Return an empty JSON array if the file is empty

    prompt = (f'Extract list of all "{entity_type}" entities appearing in the following text, in the order of their '
              f'appearance:\n'
              f'"{file_content}"')
    response = llm.invoke([HumanMessage(prompt)])
    try:
        return str(response.content)
    except Exception as e:
        return f"Error: Unable to format response content. {str(e)}"

In [6]:
@tool
def internet_search_attribute(a_entity_type: str, a_entity: str, a_attribute: str) -> str:
    """
  The function is given an entity type (e.g. 'city'), an entity (e.g, ‘Tel Aviv’)
  and an attribute (e.g.,’current population’).
  It searches the Internet to find the attribute of the entity.
  It then asks the LLM to review the search results and
  returns a JSON structure of the form
  {"a_entity": {a_entity}, "{attribute}": Answer}.
  Example answer: { "city": ”Tel Aviv", "population": 4,400,000 }
  """
    query = f'What is {a_attribute} of {a_entity}?'
    search_results = DDGS().text(query, max_results=2)
    prompt = f'Read {len(search_results)} text(s) below and comprehend what is ' \
             f'{a_attribute} of {a_entity} {a_entity_type}.\n' \
             f'Reply should be only JSON structure in the form: ' \
             f'{{ "{a_entity_type}": "{a_entity}", "{a_attribute}": <value you have found> }}.\n'
    prompt += '\n'.join([f'Text {i + 1}: "{result}".' for i, result in enumerate(search_results)])
    response = llm.invoke([HumanMessage(prompt)])
    try:
        return str(response.content)
    except Exception as e:
        return f"Error: Unable to format response content. {str(e)}"

In [7]:
@tool
def generate_analysis_program(analysis_request: str,
                              input_file: str,
                              columns: str,
                              row_example: str,
                              output_file: str) -> str:
    """
  This tool takes an analysis request, a csv input_file, the first
  line of the file containing the column names, another line of the file
  with example data, and an output_file. The analysis_request
  describes analysis of the csv input_file to be performed. This
  tool generates a Python program to perform the analysis. The
  program writes the output, a JSON object, to the output_file.
  generate_analysis_program tool returns the generated Python code.
  """
    prompt = 'Generate a correct and executable Python code block. ' \
             'Respond with code only, without any extra text or explanations.\n' \
             f'The code snippet must read the CSV file "{input_file}", ' \
             f'with the header (columns) as "{columns}", ' \
             f'and example row as: "{row_example}".\n' \
             f'The code should perform the following analysis: "{analysis_request}".\n' \
             f'It must then save the analysis result in JSON format to a file named "{output_file}".\n' \
             'Ensure to import the necessary libraries: import pandas as pd, import json.'
    response = llm.invoke([HumanMessage(prompt)])
    python_code = str(response.content)
    # Drop python code wrapping ```python <code> ```
    python_code_block_open = '```python\n'
    python_code_block_close = '\n```'
    if python_code.startswith(python_code_block_open):
        python_code = python_code[len(python_code_block_open):]
    if python_code.endswith(python_code_block_close):
        python_code = python_code[:-len(python_code_block_close)]

    return python_code.strip()

In [8]:
@tool
def execute_python_program(program_fn: str, output_fn: str) -> str:
    """
    This tool executes a Python program from a given file.
    It captures the output and any exceptions that occur during execution.
    Returns the output or error message as a string.
    """
    try:
        with open(program_fn, 'r') as file:
            code = file.read()

        output = io.StringIO()
        error_output = io.StringIO()

        with contextlib.redirect_stdout(output), contextlib.redirect_stderr(error_output):
            try:
                exec(code, {})
            except Exception as e:
                error_message = f"Error: {str(e)}\n{error_output.getvalue()}"
                with open(output_fn, 'w') as output_file:
                    output_file.write(error_message)
                return f"Program did not execute successfully. Error: {str(e)}"

            return "Program executed successfully and output is written to the file."


    except FileNotFoundError:
        error_message = f"Error: File {program_fn} not found."
        with open(output_fn, 'w') as output_file:
            output_file.write(error_message)
        return error_message

    except Exception as e:
        error_message = f"Error: {str(e)}"
        with open(output_fn, 'w') as output_file:
            output_file.write(error_message)
        return error_message

In [9]:
@tool
def write_file(file_content: str, fn: str) -> str:
    """
    This tool writes the given content to a specified file.
    Returns a success message or an error message.
    """
    try:
        print("Writing to file with parameters:", file_content)
        with open(fn, 'w') as file:
            file.write(file_content)
        return f"Success: Content written to {fn}"
    except Exception as e:
        return f"Error: {str(e)}"

In [18]:
class Agent:
    def __init__(self, agent: AzureChatOpenAI, tools: List[BaseTool]):
        self.llm_with_tools = agent.bind_tools(tools)
        self.tools = tools
        self.tool_map = {tool.name: tool for tool in tools}
        self.chain = self.llm_with_tools | self.call_tools
        self.messages: List[AIMessage] = []
        self.log_file = None
        self.llm_invocations = 0
        self.function_calls = 0

    def _log(self, message: str):
        print(message)
        if self.log_file:
            with open(self.log_file, 'a') as log:
                log.write(message + '\n')

    def _log_parameters(self, parameters: Dict[str, Any]):
        for name, value in parameters.items():
            if isinstance(value, str) and len(value) > 50:
                value = value[:50] + '...'
            self._log(f'Parameter {name} = {value}')

    @staticmethod
    def _get_query(input: Dict[str, Any]) -> str:
        query_file_name = input['query_name']
        with open(query_file_name) as query_file:
            return query_file.read()

    @staticmethod
    def _get_files_description(inputfile: Dict[str, Any]) -> List[str]:
        return ['{0}) "{1}" file: {2}'.format(n_file + 1, file_resource['file_name'], file_resource['description'])
                for n_file, file_resource in enumerate(inputfile['file_resources'])]

    def loop(self, input_json: str) -> str:
        try:
            with open(input_json) as json_file:
                inputfile = json.load(json_file)
            self.log_file = f'log_{inputfile["query_name"].replace(".txt", "")}.txt'
            query = self._get_query(inputfile)
            files_descriptions = self._get_files_description(inputfile)

            prompt = f'You are given {len(files_descriptions)} files:\n'
            prompt += '\n'.join(files_descriptions) + f'\nYour mission: {query}'
            self.messages = [HumanMessage(prompt)]

            self._log(f"**Entering loop**")
            self._log(f"Initial prompt: {prompt}")

            while self.llm_invocations < 10 and self.function_calls < 10:
                self.llm_invocations += 1
                response = self.llm_with_tools.invoke(self.messages)
                if not response.tool_calls:
                    self._log(f"**Leaving loop**")
                    return response.content
                self.messages.append(response)
                self.call_tools(response)
            self._log(f"**Leaving loop**")
        except Exception as e:
            self._log(f"Error during loop execution: {e}")
            return f"Error: {str(e)}"

    def call_tools(self, msg: AIMessage) -> Runnable:
        """Simple sequential tool calling helper."""
        tool_calls = msg.tool_calls.copy()
        for tool_call in tool_calls:
            if self.function_calls >= 10:
                break
            try:
                self.function_calls += 1
                self._log(f'** Entering Agent {tool_call["name"]} **')
                self._log_parameters(tool_call["args"])
                self._log(f"Calling tool {tool_call['name']} with args: {tool_call['args']}")

                output = self.tool_map[tool_call["name"]].invoke(tool_call["args"])
                self._log(f'Result from tool {tool_call["name"]}: {output}')

                self._log(f'** Leaving Agent {tool_call["name"]} **')
                if tool_call["name"] == "generate_analysis_program":
                    tool_call['args']['file_content'] = output
                else:
                    tool_call['output'] = output

                self.messages.append({'role': 'tool',
                                      'content': f'Requesting {tool_call["name"]}',
                                      'tool_call_id': tool_call["id"]})
            except Exception as e:
                self._log(f"Error invoking tool {tool_call['name']}: {e}")
                error_output = f"Error: {str(e)}"
                tool_call['output'] = error_output
                self.messages.append({'role': 'tool',
                                      'content': f'Error invoking {tool_call["name"]}',
                                      'tool_call_id': tool_call["id"],
                                      'error': str(e)})

In [19]:
AVAILABLE_TOOLS = [
    extract_entities_from_file,
    internet_search_attribute,
    generate_analysis_program,
    execute_python_program,
    write_file
]
agent = Agent(llm, AVAILABLE_TOOLS)
agent.loop("input.json")

**Entering loop**
Initial prompt: You are given 2 files:
1) "class2.csv" file: A csv file describing students and their grades.  the first line contains the columns (field names) of each following row.  They are:  First, Last, Year, Class, Name,Grade. An example row is: Shalom,Levi,3,Calculus,93.5
2) "students.txt" file: a text file containing information on some of the students, including his or her aspirations, favorite city, and hobbies.
Your mission: Generate a Python program that finds the average grade of all the students. The result should be represented as a JSON object with one field, 'average_grade', whose value is a number, and written to the file query3.json. The Python program itself should be stored in the file "query3.py".  Before finishing, execute the program stored in the file "query3.py".

** Entering Agent generate_analysis_program **
Parameter analysis_request = find the average grade of all the students
Parameter input_file = class2.csv
Parameter columns = First,L

'The Python program has been successfully executed and the average grade of all the students has been written to the file "query3.json".'