In [1]:
import os

# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_PROJECT"] = "tx generator - setup"

In [2]:
import pandas as pd
from datetime import datetime
from enum import Enum
from pytz import timezone

from case_code.code_downloader import CodeDownloader, get_metadata
from case_code.code_loader import CodeLoader
from case_code.code_transformer import (
    transform,
    get_transformed_stats,
    get_all_transformed_stats,
)

In [3]:
class ActionType(Enum):
    DOWNLOAD = "download"
    TRANSFORM = "transform"


def get_time_difference_desc(dt_object: datetime) -> str:
    now = datetime.now()
    time_diff = now - dt_object

    seconds = time_diff.total_seconds()
    minutes = seconds // 60
    hours = minutes // 60
    days = time_diff.days

    if seconds < 60:
        return "a few seconds ago"
    elif minutes < 60:
        return f"{int(minutes)} minute{'s' if minutes > 1 else ''} ago"
    elif hours < 24:
        return f"{int(hours)} hour{'s' if hours > 1 else ''} ago"
    else:
        return f"{days} day{'s' if days > 1 else ''} ago"


def should_continue_action(action_type: ActionType, metadata_func) -> bool:
    process_action = True  # Default to processing the action

    try:
        metadata = metadata_func()
        dt_object = datetime.fromtimestamp(metadata["last_updated"])
        readable_date = dt_object.strftime("%Y-%m-%d %H:%M:%S")

        # Prepare the prompt message based on the action type
        prompt_message = {
            ActionType.DOWNLOAD: "Do you want to download the latest files?",
            ActionType.TRANSFORM: "Do you want to process transformation?",
        }.get(action_type, "Do you want to proceed?")

        print(f"Last updated:{readable_date} ({get_time_difference_desc(dt_object)})")
        response = input(
            f"Last updated was {readable_date} ({get_time_difference_desc(dt_object)}). {prompt_message} (y/n) "
        )
        process_action = response.lower() in {"y", "Yes"}
    except FileNotFoundError as e:
        print(
            f"No {action_type.value} metadata found. Proceeding with {action_type.value}."
        )
    except Exception as e:
        print(f"Error occurred: {str(e)}. Proceeding with {action_type.value}.")

    return process_action

### Download source code


In [4]:
shall_download = should_continue_action(ActionType.DOWNLOAD, get_metadata)

Last updated:2024-08-21 17:09:15 (38 minutes ago)


In [5]:
if shall_download:
    downloader = CodeDownloader()
    downloaded_count = downloader.download()

In [6]:
metadata = get_metadata()
print(f"Total files: {metadata['total_files']}")
print(f"Total cases: {metadata['total_cases']}")

Total files: 125
Total cases: 110


### Transform source code


In [7]:
shall_transform = should_continue_action(ActionType.TRANSFORM, get_transformed_stats)

No transform metadata found. Proceeding with transform.


In [8]:
if shall_transform:
    loader = CodeLoader()
    transformed_stats = await transform(loader=loader)

Start transforming with gemini-1.0-pro...


I0000 00:00:1724233761.636694  275278 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
Transforming: 100%|██████████| 125/125 [02:14<00:00,  1.08s/file]


In [9]:
stats = get_transformed_stats()

minutes, seconds = divmod(stats["total_time_taken"], 60)
# Construct the output message
if minutes > 0:
    print(f"Time used: {minutes}m {seconds}s")
else:
    print(f"Time used: {seconds}s")

data = {
    "Title": ["Total Cases", "Failed Cases"],
    "Count": [stats["total_cases"], stats["failed_cases"]],
}
pd.DataFrame(data)

Time used: 2m 15s


Unnamed: 0,Title,Count
0,Total Cases,110
1,Failed Cases,83


### Aggregate stats from all models


In [5]:
# Fetch and load stats into a DataFrame
stats = get_all_transformed_stats()
df = pd.DataFrame(stats)

# Convert and format the 'last_updated' timestamps to local time
local_tz = timezone("Asia/Taipei")
df.loc["last_updated"] = (
    pd.to_datetime(df.loc["last_updated"], unit="s")
    .dt.tz_localize("UTC")
    .dt.tz_convert(local_tz)
    .dt.strftime("%Y-%m-%d %H:%M:%S")
)

successful_cases = df.loc["total_cases"] - df.loc["failed_cases"]

df.loc["successful_cases_total"] = (
    successful_cases.astype(str) + " / " + df.loc["total_cases"].astype(str)
)

# Calculate the success rate
df.loc["success_rate"] = successful_cases / df.loc["total_cases"] * 100

# Calculate the success case per second
df.loc["success_case_per_sec"] = successful_cases / df.loc["total_time_taken"]


# Drop the rows
df.drop(index=["cases", "total_cases", "failed_cases"], inplace=True)

# Rename the index labels for better readability
df.rename(
    index={
        "last_updated": "Last Updated",
        "total_time_taken": "Duration (sec)",
        "successful_cases_total": "Successful Cases / Total Cases",
        "success_rate": "Success Rate (%)",
        "success_case_per_sec": "Transformed Cases per Sec",
    },
    inplace=True,
)

df

Unnamed: 0,gpt-4o-mini,gpt-4o-2024-08-06,gpt-4o,claude-3-5-sonnet-20240620,gemini-1.5-pro,gemini-1.0-pro
Last Updated,2024-08-21 17:12:20,2024-08-21 17:22:21,2024-08-21 17:17:29,2024-08-21 17:33:01,2024-08-21 17:44:43,2024-08-21 17:51:36
Duration (sec),182,226,218,599,420,135
Successful Cases / Total Cases,110 / 110,110 / 110,110 / 110,69 / 110,74 / 110,27 / 110
Success Rate (%),100.0,100.0,100.0,62.727273,67.272727,24.545455
Transformed Cases per Sec,0.604396,0.486726,0.504587,0.115192,0.17619,0.2
