In [38]:
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate
from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import io
import sys
import os
import json
import pandas as pd

In [60]:
class CodeOutput(BaseModel):
    # code: str = Field(description="The Python code to execute")
    output: str = Field(description="The output of the executed code")


output_parser = PydanticOutputParser(pydantic_object=CodeOutput)

human_promt = HumanMessagePromptTemplate.from_template(
    "{request}\n{format_instructions}")

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a data scientist with 20 years of experience. You know everything about ML code writing. Write the code without comments, just pure code"),
    human_promt
])

model = Ollama(model="llama3")

In [61]:
def execute_code(code):
    old_stdout = sys.stdout
    sys.stdout = buffer = io.StringIO()

    try:
        exec(code)
        output = buffer.getvalue()
        return output
    except Exception as e:
        return f"Error executing code: {e}"
    finally:
        sys.stdout = old_stdout

In [62]:
execute_code_tool = Tool(
    name="execute_code",
    func=execute_code,
    description="Useful for executing Python code.",
    handle_parsing_errors=True
)

In [63]:
agent = initialize_agent(
    [execute_code_tool],
    model,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True
)

In [64]:
task = "Write a Python code to read the CSV using pandas file located at '/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv' and display the first 5 rows of the data. Execute the code and return the output."

request = chat_prompt.format_prompt(
    request=task,
    format_instructions=output_parser.get_format_instructions()
).to_messages()

results = agent.invoke(request)
print(results)

{'input': [SystemMessage(content='You are a data scientist with 20 years of experience. You know everything about ML code writing. Write the code without comments, just pure code'), HumanMessage(content='Write a Python code to read the CSV using pandas file located at \'/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv\' and display the first 5 rows of the data. Execute the code and return the output.\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"output": {"description": "The output of the executed code", "title": "Output", "type"

In [65]:
results['output']

'** The output is a JSON instance that conforms to the given schema.\n\nLet me know if this is what you were looking for!'

In [39]:
try:
    parsed_output = output_parser.parse(results['output'])
    print("Parsed output:")
    print(parsed_output)

    print("Executing the code:")
    output = execute_code(parsed_output.code)

    # Parse the JSON string from the output
    output_json = json.loads(parsed_output.output)

    # Print the output in a readable format
    print("First 5 rows of the Iris dataset:")
    print(pd.DataFrame(output_json['data']).head())
except Exception as e:
    print(f"Error parsing or executing the code: {e}")

Error parsing or executing the code: Invalid json output: ```
{
  "properties": {
    "code": """import pandas as pd
iris_data = pd.read_csv('/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv')
output = iris_data.head().to_json(orient='records')""",
    "output": "{\"data\":[{\"SepalLength\":6.4,\"SepalWidth\":3.2,\"PetalLength\":5.3,\"PetalWidth\":1.7},{\"SepalLength\":5.1,\"SepalWidth\":3.5,\"PetalLength\":1.4,\"PetalWidth\":0.2},{\"SepalLength\":5.7,\"SepalWidth\":4.4,\"PetalLength\":5.5,\"PetalWidth\":2.3},{\"SepalLength\":6.3\",\"SepalWidth\":3.4\",\"PetalLength\":5.7\",\"PetalWidth\":1.8},{\"SepalLength\":4.4\",\"SepalWidth\":2.9\",\"PetalLength\":3.9\",\"PetalWidth\":1.0}]}"
  }
}
```
Note: The output is a JSON string representing the first 5 rows of the Iris dataset.
