# Creation of feature importances and learning curves

## Init config

In [None]:
import os
import numpy as np
import pandas as pd
from datetime import datetime

from src.config import (
    DATA_SPLIT_DIR,
    TRAIN_RAW_FILENAME,
    MODELS_DIR
)
from src.model_evaluation import (
    save_feature_importances_data,
    plot_feature_importances_from_file,
    save_learning_curve_data,
    plot_learning_curves_from_file,
)

## Load train data

In [None]:
df_train_raw = pd.read_csv(os.path.join(DATA_SPLIT_DIR, TRAIN_RAW_FILENAME))
features_train_raw = df_train_raw.drop("Diabetes_012", axis=1)
target_train_raw = df_train_raw["Diabetes_012"]

## Evaluation

In [None]:
# model_name = "20250709122645_DecisionTreeClassifier_f100772_baseline_smote"
# folder = os.path.join(
#     r"D:\Projekte\Predict_Diabetes_From_BRFSS2015_old\debugmodels", model_name
# )  # os.path.join(MODELS_DIR, model_name)
# filename = os.path.join(folder, model_name)

In [None]:
for subdir_name in os.listdir(MODELS_DIR):
    model_folder = os.path.join(MODELS_DIR, subdir_name)
    if not os.path.isdir(model_folder):
        continue
    print()
    print("#####################################")
    print(subdir_name)
    print()

    start_timestamp = datetime.now()

    save_feature_importances_data(
        model_folder,
        X=features_train_raw,
        y=target_train_raw,
    )

    plot_feature_importances_from_file(model_folder)

    end_timestamp = datetime.now()
    td = end_timestamp - start_timestamp
    print(
        f"feature importance duration {td.days} d {(td.seconds // 3600)} h"
        f" {(td.seconds % 3600) // 60} m {td.seconds % 60} s"
    )

    start_timestamp = datetime.now()

    save_learning_curve_data(
        model_folder,
        X=features_train_raw,
        y=target_train_raw,
        cv=2,
        scoring="f1_macro",
        n_jobs=-1,
        train_sizes=np.linspace(0.1, 1.0, 2),
    )

    plot_learning_curves_from_file(model_folder)

    end_timestamp = datetime.now()
    td = end_timestamp - start_timestamp
    print(
        f"learning curve duration {td.days} d {(td.seconds // 3600)} h"
        f" {(td.seconds % 3600) // 60} m {td.seconds % 60} s"
    )
    print()