# Utility Functions

> 通用工具函数和路径设置
>
> General utility functions and path settings

## 简介/Description:

utils 模块包含通用工具函数和项目中的关键路径设置，如 data_path。这些工具函数与项目的各个模块没有直接耦合，提供了项目中可复用的常用功能。

The utils module contains general utility functions and key path settings for the project, such as data_path. These utility functions are decoupled from the project’s main modules and provide commonly used reusable functionality across the project.

## 主要符号/Main symbols:

- data_path: 数据存储路径的设置，用于配置数据集的根目录。

  data_path: Defines the data storage path, used for setting the dataset root directory.


- other_util_function: 其他工具函数，未来可以扩展。

  other_util_function: Placeholder for other utility functions, expandable for future needs.

In [14]:
#| default_exp utils

In [15]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
#| export
from pathlib import Path
import inspect
import namable_classify
lib_init_path = Path(inspect.getfile(namable_classify))
lib_directory_path = lib_init_path.parent
lib_repo_path = lib_directory_path.parent
runs_path = lib_repo_path/'runs'
runs_path.mkdir(exist_ok=True, parents=True)
runs_figs_path = runs_path/'figs'
runs_figs_path.mkdir(exist_ok=True, parents=True)
data_path = lib_repo_path/'data'
data_path.mkdir(exist_ok=True, parents=True)

In [17]:
#| export
from fastcore.basics import patch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
@patch
def print_trainable_parameters(model:nn.Module):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

In [21]:
#| export
from bigmodelvis import Visualization
@patch
def print_model_pretty(self:nn.Module):
    Visualization(self).structure_graph()

In [1]:
#| export
import torch
import numpy as np
def ensure_array(x: torch.TensorType | np.ndarray | list):
    if isinstance(x, torch.Tensor):
        return x.detach().cpu().numpy()
    elif isinstance(x, np.ndarray):
        return x
    else: # list
        return np.array(x)

In [2]:
#| export
from sklearn.metrics import *

# from scipy.special import softmax
import torch.nn.functional as F
import numpy as np
import torch
from loguru import logger
def default_on_exception(default_value=None):
    def decorator(func):
        def wrapper(*args, **kwargs):
            try:
                result = func(*args, **kwargs)
                return result
            except Exception as e:
                logger.warning(f"An exception occurred: {e}")
                return default_value
        return wrapper
    return decorator

roc_auc_score = default_on_exception(-1)(roc_auc_score)

def compute_classification_metrics(
    y_true: np.ndarray,  # 1d array-like, or label indicator array / sparse matrix
    y_pred_logits: np.ndarray,  # label indicator array / sparse matrix
    logits_to_prob: bool = False,  # function to convert logits to probabilities
    labels:list[int|str]|None = None,  # list of labels
):
    y_true = ensure_array(y_true)
    y_pred_logits = ensure_array(y_pred_logits)
    # print(type(y_pred_logits)) # <class 'numpy.ndarray'>
    # y_pred_probs = softmax(y_pred_logits)# label indicator array / sparse matrix
    y_pred_probs = (
        np.array(F.softmax(torch.Tensor(y_pred_logits), dim=1))
        if logits_to_prob
        else y_pred_logits
    )  # label indicator array / sparse matrix
    y_pred = np.argmax(y_pred_logits, axis=1)
    # target_names = labels # dataset['train'].features[label_column_name].names
    # report_dict = classification_report(y_true, y_pred_probs, target_names=target_names, output_dict=True)
    top_k_res = {
        f"acc{k}": top_k_accuracy_score(y_true, y_pred_probs, k=k, labels=labels)
        for k in [1, 2, 3, 5, 10, 20]
    }
    balance_res = dict(
        roc_auc=roc_auc_score(
            y_true, y_pred_probs, average="macro", multi_class="ovr", labels=labels
        ),  # ovr更难一些，会不平衡
        matthews_corrcoef=matthews_corrcoef(y_true, y_pred),
        f1=f1_score(y_true, y_pred, average="macro", labels=labels),
        precision=precision_score(y_true, y_pred, average="macro", labels=labels),
        recall=recall_score(y_true, y_pred, average="macro", labels=labels),
        log_loss=log_loss(
            y_true,
            y_pred_probs,
            labels=labels
        ),
        balanced_accuracy=balanced_accuracy_score(y_true, y_pred),
        cohen_kappa=cohen_kappa_score(y_true, y_pred, labels=labels),
        hinge_loss=hinge_loss(y_true, y_pred_probs, labels=labels),
    )

    # return top_k_res| balance_res| report_dict
    return top_k_res | balance_res

In [5]:
compute_classification_metrics(torch.randint(0, 20, size=(100,)), 
                               torch.softmax(torch.randn(100, 20), dim=1))



{'acc1': 0.04,
 'acc2': 0.07,
 'acc3': 0.14,
 'acc5': 0.3,
 'acc10': 0.48,
 'acc20': 1.0,
 'roc_auc': 0.4810993748925897,
 'matthews_corrcoef': -0.016189245768220034,
 'f1': 0.030357142857142853,
 'precision': 0.034999999999999996,
 'recall': 0.027424242424242427,
 'log_loss': 3.4732603841931753,
 'balanced_accuracy': 0.027424242424242427,
 'cohen_kappa': -0.01608806096528359,
 'hinge_loss': 1.1724018}

In [6]:
#| export
def append_dict_list(dict, name, value):
    dict[name] = dict.get(name, []) + [value]

In [7]:
#| export
def partial_with_self(method, *args, **kwargs):
    def wrapped(self, *additional_args, **additional_kwargs):
        # Combine provided args and kwargs with additional ones
        all_args = args + additional_args
        all_kwargs = kwargs | additional_kwargs
        return method(self, *all_args, **all_kwargs)
    return wrapped

In [8]:
#| hide
import nbdev; nbdev.nbdev_export()