In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'


In [2]:
%pwd
%cd ..
%cd src
%pwd

/home/kyudan/AnomLLM
/home/kyudan/AnomLLM/src


'/home/kyudan/AnomLLM/src'

In [3]:
#Please edit the BASE_DIR on your situation.

BASE_DIR = "/home/kyudan/AnomLLM"

In [4]:
def make_component_prompt(time_series):
        
    COMPONENT_PROMPT = f"""Time series data can typically be decomposed into three main components such as Trend, Seasonality, and Residuals.
In this analysis, Residuals are further divided into Noise and Anomalies.

The four components exhibit the following characteristics:
* Trend: A long-term directional pattern or consistent upward/downward movement.
* Seasonality: Regular and predictable cycles repeating at consistent intervals.
* Noise: Random, irregular fluctuations without a meaningful pattern.
* Anomalies: Observations significantly deviating from typical or expected patterns.

Below is the given time series data:
{time_series}

Analyze the provided time series carefully. 
Determine if each of these components (Trend, Seasonality, Noise, Anomalies) is present (1) or absent (0).

Provide your answer strictly in the following JSON format:
{{"Trend": 0 or 1, "Seasonality": 0 or 1, "Noise": 0 or 1, "Anomalies": 0 or 1}}
    """
    return COMPONENT_PROMPT

In [5]:
from prompt import LIMIT_PROMPT,PROMPT, time_series_to_str


def create_component_prompt(
        idx,
        series,
):

    messages = [
        {
            "role": "user",
            "content": make_component_prompt(time_series_to_str(series[idx]))
        }
    ]
    if idx == 0:
        print(messages[0]["content"])
    return {
        "messages": messages,
        "temperature": 0.4,
        "stop": ["''''", " – –", "<|endoftext|>", "<|eot_id|>"]
    }


In [6]:
from openai_api import send_openai_request
import json
import time
import os
from loguru import logger
from data.synthetic import SyntheticDataset
import pandas as pd



def online_AD_with_retries_component(
    model_name: str,
    pcl_name: str,
    #data_name: str,
    #request_func: callable,
    #variant: str = "standard",
    num_retries: int = 4,

):
    df = pd.read_pickle(pcl_name)

    # Initialize dictionary to store results
    results = {}

    # Configure logger
    log_fn = f"logs/synthetic/{pcl_name}/{model_name}/" + ".log"
    logger.add(log_fn, format="{time} {level} {message}", level="INFO")
    results_dir = f'{BASE_DIR}/results/synthetic/{pcl_name}/{model_name}/'
    

    

    jsonl_fn = os.path.join(results_dir + 'component.jsonl')
    os.makedirs(results_dir, exist_ok=True)



    # prepare series and anomaly locations(gt)

    if os.path.exists(jsonl_fn):
        logger.error(f"Results file '{jsonl_fn}' already exists. Please delete it first to avoid duplicate entries.")
        print(f"Results file '{jsonl_fn}' already exists. Please delete it and run again.")
        return None

    for i in range(1, len(df)+1):
        custom_id = f"{pcl_name}_{model_name}_{str(i).zfill(5)}"
        series = df['series_values']

        

        # Perform anomaly detection with exponential backoff
        for attempt in range(num_retries):
            try:
                request = create_component_prompt(i-1, series)
                response = send_openai_request(request, model_name)
                
                # Write the result to jsonl
                with open(jsonl_fn, 'a') as f:
                    json.dump({'custom_id': custom_id, 'request': request, 'response': response}, f)
                    f.write('\n')
                # If successful, break the retry loop
                break
            except Exception as e:
                if "503" in str(e):  # Server not up yet, sleep until the server is up again
                    while True:
                        logger.debug("503 error, sleep 30 seconds")
                        time.sleep(30)
                        try:
                            response = send_openai_request(request, model_name)
                            break
                        except Exception as e:
                            if "503" not in str(e):
                                break
                else:
                    logger.error(e)
                    # If an exception occurs, wait and then retry
                    wait_time = 2 ** (attempt + 3)
                    logger.debug(f"Attempt {attempt + 1} failed. Waiting for {wait_time} seconds before retrying...")
                    time.sleep(wait_time)
                    continue
        else:
            logger.error(f"Failed to process {custom_id} after {num_retries} attempts")

In [7]:
pcl_name = "../data/synthetic/component_series_400.pkl"
online_AD_with_retries_component('gemini-1.5-flash', pcl_name)


[32m2025-04-06 18:10:56.389[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m


Time series data can typically be decomposed into three main components such as Trend, Seasonality, and Residuals.
In this analysis, Residuals are further divided into Noise and Anomalies.

The four components exhibit the following characteristics:
* Trend: A long-term directional pattern or consistent upward/downward movement.
* Seasonality: Regular and predictable cycles repeating at consistent intervals.
* Noise: Random, irregular fluctuations without a meaningful pattern.
* Anomalies: Observations significantly deviating from typical or expected patterns.

Below is the given time series data:
-0.63 -0.57 -0.52 -0.47 -0.42 -0.38 -0.33 -0.3 -0.26 -0.24 -0.22 -0.2 -0.2 -0.19 -0.2 -0.21 -0.23 -0.25 -0.28 -0.32 -0.36 -0.4 -0.45 -0.5 -0.55 -0.6 -0.65 -0.7 -0.75 -0.79 -0.84 -0.88 -0.91 -0.94 -0.96 -0.98 -0.99 -1.0 -1.0 -0.99 -0.98 -0.96 -0.93 -0.9 -0.86 -0.82 -0.77 -0.72 -0.67 -0.62 -0.57 -0.51 -0.46 -0.41 -0.36 -0.31 -0.27 -0.23 -0.2 -0.18 -0.15 -0.14 -0.13 -0.13 -0.14 -0.15 -0.17 -0.19 

[32m2025-04-06 18:10:57.771[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m
[32m2025-04-06 18:10:59.148[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m
[32m2025-04-06 18:11:00.627[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m
[32m2025-04-06 18:11:02.716[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m
[32m2025-04-06 18:11:04.465[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m:[36msend_gemini_request[0m:[36m93[0m - [34m[1mAPI key: ***********************************4mhI[0m
[32m2025-04-06 18:11:06.127[0m | [34m[1mDEBUG   [0m | [36mgemini_api[0m: