In [None]:
import sys
from collections import Counter
from pathlib import Path
from typing import Dict, List, Tuple, Union

import pandas as pd
from rich import print

abs_module_path = Path("./../").resolve()
if (abs_module_path.exists()) and (str(abs_module_path) not in sys.path):
    sys.path.append(str(abs_module_path)) # add path to scan customized module

from modules.dl.trainer.utils import calculate_class_weight
from modules.dl.utils import gen_class_counts_dict
from modules.shared.config import load_config

In [None]:
file = Path(load_config("tool-com.toml")["file"])
print(f"file = '{file}'")

df = pd.read_csv(file, encoding='utf_8_sig')
df

In [None]:
num2class_list: list = sorted(Counter(df["class"]).keys())
num2class_list

In [None]:
train_df = df[(df["dataset"] == "train")]
valid_df = df[(df["dataset"] == "valid")]
test_df = df[(df["dataset"] == "test")]

## data samples

In [None]:
print(f"test_df = {len(test_df)}")
print(f"train_df = {len(train_df)}")
print(f"valid_df = {len(valid_df)}")

## class_weight

In [None]:
temp_dict: Dict[str, int] = gen_class_counts_dict(test_df, num2class_list)
print(f"class_counts of `test_df` : {temp_dict}")
print(f"class_weight of `test_df` : {calculate_class_weight(temp_dict)}\n")

temp_dict: Dict[str, int] = gen_class_counts_dict(train_df, num2class_list)
print(f"class_counts of `train_df` : {temp_dict}")
print(f"class_weight of `train_df` : {calculate_class_weight(temp_dict)}\n")

temp_dict: Dict[str, int] = gen_class_counts_dict(valid_df, num2class_list)
print(f"class_counts of `valid_df` : {temp_dict}")
print(f"class_weight of `valid_df` : {calculate_class_weight(temp_dict)}\n")