In [1]:
from twinyn.agents.seed_agent import *
import twinyn.agents.agent_functions
from twinyn.agents.prompts import *

In [2]:
from pathlib import Path

from autogen.coding import LocalCommandLineCodeExecutor

import os
from dotenv import load_dotenv
load_dotenv()

work_dir = Path("coding")
work_dir.mkdir(exist_ok=True)

In [3]:
executor = LocalCommandLineCodeExecutor(work_dir=work_dir, functions=[agent_functions.execute_sql])
seed_prompt = "Peak traffic time in the past 5 hours bucketed in 30 minute intervals"

In [4]:
llm_config_sql = {"config_list": [{"model": "gpt-4o-2024-08-06", "api_key": os.getenv("OPENAI_API_KEY"), "price": [0.0025, 0.0100]}]}
llm_config_analyst = {"config_list": [{"model": "gpt-4o-mini", "api_key": os.getenv("OPENAI_API_KEY"), "price": [0.000150, 0.0006]}]}

In [5]:
def custom_message(sender: ConversableAgent, recipient: ConversableAgent, context: dict) -> str | dict:
    carryover = context.get("carryover", "")
    if isinstance(carryover, list):
        carryover = '\n'.join(carryover[-3:])
    final_msg = "What do you think of the results? Do you find any peculiarity or anything that require further querying?" + "\nContext: \n" + carryover
    print("FINAL_MESSAGE: ", final_msg)
    return final_msg


In [6]:
seed = SeedTask(executor, llm_config_sql, llm_config_analyst, custom_message, seed_prompt, SQL_AGENT_SYSTEM_PROMPT, ANALYST_AGENT_SYSTEM_PROMPT)

In [7]:
seed.kickoff()


[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33mcode_executor_agent[0m (to sql_agent):

Peak traffic time in the past 5 hours bucketed in 30 minute intervals

--------------------------------------------------------------------------------
[33msql_agent[0m (to code_executor_agent):

To determine the peak traffic time in the past 5 hours, we can follow these steps:

1. **Identify the Latest Log Entry**: We need to find the most recent log entry to ensure we're working with the correct timeframe, using a Common Table Expression (CTE).

2. **Compute the Time Range**: Subtract 5 hours from the latest log entry to determine the start of our time range.

3. **Bucket the Logs**: Group the logs into 30-minute intervals for the past 5 hours.

4. **Count Accesses in Each Interval**: For each 30-minute interval, count the nu

In [8]:
seed.chat_result[0].chat_history[0]['content']

'Peak traffic time in the past 5 hours bucketed in 30 minute intervals'

In [11]:
out = SeedOutput(seed.chat_result)

In [12]:
out.collect()

In [13]:
print(out.code_res_msg)

exitcode: 0 (execution succeeded)
Code output: [(datetime.datetime(2019, 1, 22, 10, 30, tzinfo=datetime.timezone(datetime.timedelta(seconds=19800))), 53401)]

The query executed successfully. The peak traffic time within the past 5 hours was during the interval starting at "2019-01-22 10:30:00" (local timezone) with 53,401 accesses recorded. 



In [14]:
print(out.analysis_msg)

Analysis:
The peak traffic time indicates a significant influx of user activity, suggesting high interest or demand for content or services at that specific time. The unusually high access count of 53,401 in a short time frame could hint at various phenomena such as a marketing campaign, sudden news event, or technical issues (like a recursive request loop). This event warrants further exploration to identify potential causes or effects, like the types of requests made, the geographical distribution of users accessing the server during that time, or if it correlates with any campaigns or outages.

Further Instructions:
1. Retrieve the details of the server log entries recorded in the interval starting at "2019-01-22 10:30:00" and covering one hour after that time to analyze the requests made during the peak traffic.
2. Analyze the geographical distribution of the users by correlating the client IP addresses in the server log with the geoip2_network and geoip2_location tables for that p

In [17]:
print(out.parsed_instructions)

['Retrieve the details of the server log entries recorded in the interval starting at "2019-01-22 10:30:00" and covering one hour after that time to analyze the requests made during the peak traffic.', 'Analyze the geographical distribution of the users by correlating the client IP addresses in the server log with the geoip2_network and geoip2_location tables for that peak period.', 'Investigate the status codes reported during the peak traffic to assess whether there were any errors (e.g., status codes 4xx or 5xx) that could indicate issues affecting user experience.']
