## Load modules and set iterables

In [None]:
import os

from modules.selection import run_shap_selection
from modules.training import retrain_on_reduced_features

In [None]:
# Define directory paths for data, models, and SHAP explanations
train_dir = '../data/processed/train'
test_dir = '../data/processed/test'
feature_select_dir = '../data/reduced'
models_dir = '../models'
explanations_dir = '../shap-explanations'
metrics_dir = '../metrics'

# Retrieve and sort all available dataset names based on model directories
dataset_names = sorted(os.listdir(models_dir))

# Define feature selection aggregation strategies
strategies = ['sum', 'max']

# Thresholds for feature selection based on the 'sum' strategy
# Cumulative importance thresholds for the 'sum' approach
sum_thresholds = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95]

# Thresholds for feature selection based on the 'max'
# Minimum SHAP value cutoffs for the 'max' approach
max_thresholds = [0.01, 0.05, 0.1, 0.15, 0.25, 0.5]

# List of model types to iterate over during retraining
model_types = ['logreg', 'dt', 'rf', 'xgb', 'svc']

## Generate reduced datasets

In [None]:
# Run feature selection for all datasets and create reduced datasets
run_shap_selection(
    dataset_names = dataset_names,
    model_types = model_types,
    strategies = strategies,
    sum_thresholds = sum_thresholds,
    max_thresholds = max_thresholds,
    train_dir = train_dir,
    test_dir = test_dir,
    explanations_dir = explanations_dir,
    save_dir = feature_select_dir,
    metrics_dir = metrics_dir
)

## Retrain models on reduced datasets

In [None]:
# Define directory containing feature-reduced training datasets
reduced_train_dir = '../data/reduced/train'

# List all reduced training dataset files available in the directory
reduced_datasets = os.listdir(reduced_train_dir)

In [None]:
# Retrain models on reduced features
retrain_on_reduced_features(
    dataset_names = dataset_names,
    model_types = model_types,
    reduced_datasets = reduced_datasets,
    models_dir = models_dir,
    reduced_train_dir = reduced_train_dir,
    selection_parse_mode = "model",
)