In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
# SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_REGION = os.getenv("AWS_REGION")

In [2]:
# Import OpenAI chat model
from langchain.chat_models import ChatOpenAI
import os

# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Create the OpenAI chat model
chat = ChatOpenAI(
    model="gpt-3.5-turbo",  # Use "gpt-4" if you want a cheaper alternative
    temperature=0.2
)

  chat = ChatOpenAI(


s3 clients

In [13]:
import boto3
from langchain.tools import Tool
import pandas as pd

os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
os.environ["AWS_SECRET_ACCESS_KEY"]=AWS_SECRET_ACCESS_KEY
# os.environ["AWS_REGION"]= AWS_REGION

# Initialize S3 client
s3_client = boto3.client("s3")

# Function to list files in an S3 bucket
def list_s3_objects(bucket_name):
    response = s3_client.list_objects_v2(Bucket=bucket_name)
    if "Contents" in response:
        return [obj["Key"] for obj in response["Contents"]]
    return "No files found in the bucket."

# Function to read a file from S3
def read_s3_object(bucket_name, file_key):
    response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
    # content = response["Body"].read().decode("utf-8")
    content = pd.read_csv(response["Body"])
    content = content[["Latitude", "Longitude","Magnitude"]].head(10000)
    return content

from langchain.tools import StructuredTool
from pydantic import BaseModel

# Define input schema for ReadS3File tool
class ReadS3FileInput(BaseModel):
    bucket_name: str
    file_key: str

# Define input schema for ListS3Files tool
class ListS3FilesInput(BaseModel):
    bucket_name: str

# Define tools using `StructuredTool`
list_s3_tool = StructuredTool.from_function(
    name="ListS3Files",
    func=list_s3_objects,
    description="Lists all files in a given S3 bucket.",
    args_schema=ListS3FilesInput
)

read_s3_tool = StructuredTool.from_function(
    name="ReadS3File",
    func=read_s3_object,
    description="Reads the content of a file from an S3 bucket.",
    args_schema=ReadS3FileInput
)

In [4]:
list_s3_objects("earthquakedb")

['data_etl.csv']

In [5]:
read_s3_object("earthquakedb", "data_etl.csv")

Unnamed: 0.1,Unnamed: 0,Year,Local_Time,UTC_Time,Magnitude,Location,Depth,Latitude,Longitude,Event_type,Assessment,Agency
0,0,2025,Jan 26 14:48:47,Jan 26 13:48:47,1.3,Martigny VS,4.9,46.16,7.06,,automatic,SED
1,1,2025,Jan 26 09:36:00,Jan 26 08:36:00,1.2,Liestal BL,10.6,47.48,7.74,,automatic,SED
2,2,2025,Jan 26 05:53:30,Jan 26 04:53:30,1.3,Bosco/Gurin TI,5.0,46.30,8.34,,automatic,SED
3,3,2025,Jan 26 05:40:34,Jan 26 04:40:34,1.7,CHATEL-ST-DENIS FR,2.2,46.49,6.89,,automatic,SED
4,4,2025,Jan 25 23:06:29,Jan 25 22:06:29,1.6,Unterschaechen UR,8.5,46.90,8.80,,automatic,SED
...,...,...,...,...,...,...,...,...,...,...,...,...
95,100,2025,Jan 08 15:37:59,Jan 08 14:37:59,1.3,Montafon A,2.1,47.08,10.00,earthquake,manual,SED
96,101,2025,Jan 08 10:52:45,Jan 08 09:52:45,1.0,Aosta I,4.0,45.81,7.43,earthquake,manual,SED
97,102,2025,Jan 08 08:46:05,Jan 08 07:46:05,0.9,Scuol GR,2.1,46.80,10.29,earthquake,manual,SED
98,103,2025,Jan 08 01:58:23,Jan 08 00:58:23,1.6,Chamonix F,6.6,45.90,7.01,earthquake,manual,SED


In [6]:
import folium
import matplotlib

from langchain_experimental.utilities import PythonREPL

python_repl = PythonREPL()
repl_tool = Tool(
    name="python_repl",
    description="Executes python code and returns the result. The code runs in a static sandbox without interactive mode, so print output or save output to a file.",
    func=python_repl.run,
)
repl_tool.name = "python_interpreter"

In [14]:
import folium
import pandas as pd
from langchain.tools import Tool

# Define input schema for reading the file from S3 and plotting the map
class MapPlotInput(BaseModel):
    bucket_name: str
    file_key: str

def scale_radius(magnitude):
    return 2 ** (magnitude)

# Modify plot_map to accept input from S3
def plot_map(bucket_name: str, file_key: str):
    """Reads earthquake data from S3 and plots it on a Folium map."""
    
    # Read the data from S3
    data = read_s3_object(bucket_name, file_key)  # Ensure this returns a DataFrame

    # Check if 'Latitude' and 'Longitude' columns exist
    if "Latitude" not in data.columns or "Longitude" not in data.columns:
        return "Data must contain 'Latitude' and 'Longitude' columns."

    # Create a base map centered around the mean location
    map_center = [data["Latitude"].mean(), data["Longitude"].mean()]
    map_plot = folium.Map(location=map_center, zoom_start=8)

    # Add points to the map
    for _, row in data.iterrows():
        folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=scale_radius(row['Magnitude']),  # Apply the scaling function
        color='red',
        fill=True,
        fill_color='red',
        popup=f"Magnitude: {row.get('Magnitude', 'N/A')}"
        ).add_to(map_plot)

    # Save the map to an HTML file
    map_plot.save("earthquake_map.html")

    return "Map plot saved as earthquake_map.html"

# Define the LangChain tool using the new input schema
map_plot_tool = StructuredTool.from_function(
    name="MapPlot",
    func=plot_map,
    description="Creates an interactive map plot using Folium. Requires an S3 bucket name and file key.",
    args_schema=MapPlotInput  # Use the new schema for input
)

# Define a new LangChain tool for mapping
# map_plot_tool = Tool(
#     name="MapPlot",
#     func=plot_map,
#     description="Creates an interactive map plot using Folium. Requires a DataFrame with 'latitude' and 'longitude' columns."
# )


In [120]:
df = read_s3_object("earthquakedb", "data_etl.csv")
plot_map(df)

'Map plot saved as earthquake_map.html'

Agent

In [15]:
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate


# Define prompt
# prompt = ChatPromptTemplate.from_template("{input}")
prompt = ChatPromptTemplate.from_template(
    "{input}\n\nPrevious Actions:\n{agent_scratchpad}"
)

# Create LLM Agent
agent = create_openai_tools_agent(
    llm=chat,
    tools=[list_s3_tool, read_s3_tool, map_plot_tool],  # Add S3 tools here
    prompt=prompt,
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=[list_s3_tool, read_s3_tool, map_plot_tool],  # Add tools here as well
    verbose=True
)


In [122]:
print(agent_executor)

verbose=True agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={}, partial_variables={}, template='{input}\n\nPrevious Actions:\n{agent_scratchpad}'), additional_kwargs={})])
| RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x153261e40>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x172fa15d0>, temperature=0.2, model_kwargs={}, openai_api_key='sk-proj-9iegtIvnMu79yY6fg1sGT3BlbkFJ5ZXzY5GbxbC6Vj9BcDXY', openai_proxy=''), kwargs={'tools': [{'type': 'function', 'function': {'name': 'ListS3Files', 'description': 'Lists all files in a given S3 bucket.', 'parameters': {'proper

In [99]:
# Run Agent with a query
response = agent_executor.invoke({"input": "List all files in s3 bucket called earthquakedb"})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ListS3Files` with `{'bucket_name': 'earthquakedb'}`


[0m[36;1m[1;3m['data_etl.csv'][0m[32;1m[1;3m
Invoking: `ReadS3File` with `{'bucket_name': 'earthquakedb', 'file_key': 'data_etl.csv'}`


[0m[33;1m[1;3m     Latitude  Longitude
0       46.16       7.06
1       47.48       7.74
2       46.30       8.34
3       46.49       6.89
4       46.90       8.80
..        ...        ...
995     47.57       6.86
996     47.24       9.03
997     47.13       7.18
998     47.24       9.03
999     47.13       7.17

[1000 rows x 2 columns][0m[32;1m[1;3m
Invoking: `ListS3Files` with `{'bucket_name': 'earthquakedb'}`


[0m[36;1m[1;3m['data_etl.csv'][0m[32;1m[1;3m
Invoking: `ListS3Files` with `{'bucket_name': 'earthquakedb'}`


[0m[36;1m[1;3m['data_etl.csv'][0m[32;1m[1;3m
Invoking: `ListS3Files` with `{'bucket_name': 'earthquakedb'}`


[0m[36;1m[1;3m['data_etl.csv'][0m[32;1m[1;3m
Invoking: `ListS3Files`

In [100]:
response = agent_executor.invoke({"input": "Read file data_etl.csv from S3 bucket named earthquakedb"})
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ListS3Files` with `{'bucket_name': 'earthquakedb'}`


[0m[36;1m[1;3m['data_etl.csv'][0m[32;1m[1;3m
Invoking: `ReadS3File` with `{'bucket_name': 'earthquakedb', 'file_key': 'data_etl.csv'}`


[0m[33;1m[1;3m     Latitude  Longitude
0       46.16       7.06
1       47.48       7.74
2       46.30       8.34
3       46.49       6.89
4       46.90       8.80
..        ...        ...
995     47.57       6.86
996     47.24       9.03
997     47.13       7.18
998     47.24       9.03
999     47.13       7.17

[1000 rows x 2 columns][0m[32;1m[1;3mThe content of the file `data_etl.csv` from the S3 bucket named `earthquakedb` is as follows:

```
Latitude  Longitude
0       46.16       7.06
1       47.48       7.74
2       46.30       8.34
3       46.49       6.89
4       46.90       8.80
..        ...        ...
995     47.57       6.86
996     47.24       9.03
997     47.13       7.18
998     47.24       9.03
999

In [16]:
response = agent_executor.invoke({"input": "Read file data_etl.csv from S3 bucket named earthquakedb and plot the result in a map plot"})
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ReadS3File` with `{'bucket_name': 'earthquakedb', 'file_key': 'data_etl.csv'}`


[0m[33;1m[1;3m      Latitude  Longitude  Magnitude
0        46.16       7.06        1.3
1        47.48       7.74        1.2
2        46.30       8.34        1.3
3        46.49       6.89        1.7
4        46.90       8.80        1.6
...        ...        ...        ...
8742     45.88       7.02        0.4
8743     48.12       7.57        1.3
8744     48.13       7.57        0.3
8745     47.07       9.50        0.6
8746     47.07       9.50        0.7

[8747 rows x 3 columns][0m[32;1m[1;3m
Invoking: `MapPlot` with `{'bucket_name': 'earthquakedb', 'file_key': 'data_etl.csv'}`


[0m[38;5;200m[1;3mMap plot saved as earthquake_map.html[0m[32;1m[1;3mThe file data_etl.csv from the S3 bucket named earthquakedb has been read successfully. The data consists of latitude, longitude, and magnitude values for earthquake events.

I ha