## Import the needed package

In [None]:
# package for handling files and directories
import os
# package for handling file system
import sys
# add path to utilities directory
sys.path.insert(0, './../../')
# import the needed path
from utilities import path_log_wandb
# package for handling the logs history
import wandb
# package for handling logs tabular data
import pandas as pd
# package for handling the environment variables
from dotenv import load_dotenv
# get the environment variables
load_dotenv()

True

## Global variables

In [None]:
# wandb variables
wandb_repo = "bugi-sulistiyo-universitas-mulawarman/CAD - Glaucoma Segmentation/"
run_id = os.environ.get("RUN_ID")

## Get the training logs from Wandb

### Prepare the dictionary and variables

In [4]:
# get the latest wandb runs id
## prepare the dictionary variable to store the runs id
runs_id = {}

## extract the runs id from the environment variable
for element in run_id.split(","):
    key, value = element.split(":")
    runs_id[key] = value

# delete the run_id variable
del run_id

In [None]:
# create the dictionary to store the logs file data locally
os.makedirs(path_log_wandb, exist_ok=True)

In [6]:
# initialize the wandb api
api = wandb.Api()

### Get the data

In [None]:
# create the empty dataframe to store the merged logs data
merge_df = pd.DataFrame()

# download the logs data from wandb
for model, log_id in runs_id.items():
    # get the run object foro the specific log id in wandb
    run = api.run(f"{wandb_repo}{log_id}")
    # get the log data in tabular format
    log_df = run.history()

    # sort the values by epoch
    log_df.sort_values(by="_step", inplace=True)
    # remove the timestamp column
    log_df.drop(columns=["_timestamp"], inplace=True)
    # rename the epoch column
    log_df.rename(columns={"_step": "epoch"}, inplace=True)
    # reorganize the columns order
    log_df = log_df[["epoch",
                    "train_loss", "train_auc", "train_f1", "train_accuracy", "train_precision", "train_recall",
                    "val_loss", "val_auc", "val_f1", "val_accuracy", "val_precision", "val_recall"]]
    
    # save the log data to the local directory
    log_df.to_csv(os.path.join(path_log_wandb, f"{model}.csv"), index=False)
    # add the model name to the dataframe
    log_df["model"] = model
    # merge the log data to the merged dataframe
    merge_df = pd.concat([merge_df, log_df])

# reset the index of the merged dataframe
merge_df.reset_index(drop=True, inplace=True)
# save the merged dataframe to the local directory
merge_df.to_csv(os.path.join(path_log_wandb, "merged_log.csv"), index=False)