# This notebook trains a model with cross-val on the entire dataset
- Gets pred_probs on holdout and saves results as numpy files
- Make sure you run ``preprocess_data`` and ``create_labels_df`` on local and push/pull newest ``cifar10_test_consensus_dataset`` first

In [1]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.insert(0, "../")

import numpy as np
import pandas as pd
import pickle
import datetime
from pathlib import Path
import cleanlab
from utils.cross_validation_autogluon import cross_val_predict_autogluon_image_dataframe

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Load data

In [7]:
# Load consensus test files
data_filepath = './data/benchmark_data/cifar10_test_consensus_dataset.csv'
df = pd.read_csv(data_filepath)

# Create mini train dataset for testing
num_from_each_group = 15
mini_df = df.groupby("label").head(num_from_each_group)
mini_df.groupby("label")["image"].count().reset_index()

Unnamed: 0,label,image
0,0,15
1,1,15
2,2,15
3,3,15
4,4,15
5,5,15
6,6,15
7,7,15
8,8,15
9,9,15


**Model and data saving params**

In [8]:
# save/load folders
model_folder = './data/cifar10_consensus_worst_25_coin20' # + [model_type]

# generate cross-validated predicted probabilities for various models
models = [
    "timm/resnet18.a1_in1k",
    "timm/swin_base_patch4_window7_224.ms_in22k_ft_in1k"
]

# xvalidation parameters
num_cv_folds = 5 # number K in stratified K-folds cross-validation
verbose = 1 # verbose for dataloading duing crossval to numpy save

# shared model parameters
# epochs = 100 #100
# holdout_frac = 0.2
time_limit = 21600 #21600
random_state = 123

## Run cross validation on `models`

In [9]:
%%time
# run cross-validation for each model
for model in models:
    
    print("----")
    print(f"Running cross-validation for model: {model}")

    MODEL_PARAMS = {
        "model.timm_image.checkpoint_name": model,
    }

    # results of cross-validation will be saved to pickle files for each model/fold
    _ = \
        cross_val_predict_autogluon_image_dataframe(
            df=mini_df,
            out_folder=f"{model_folder}_{model}/", # save results of cross-validation in pickle files for each fold
            n_splits=num_cv_folds,
            model_params=MODEL_PARAMS,
            time_limit=time_limit,
            random_state=random_state,
        )

Global seed set to 123


----
Running cross-validation for model: timm/resnet18.a1_in1k
----
Running Cross-Validation on Split: 0


model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name              | Type                            | Params
----------------------------------------------------------------------
0 | model             | TimmAutoModelForImagePrediction | 11.2 M
1 | validation_metric | MulticlassAccuracy              | 0     
2 | loss_func         | CrossEntropyLoss                | 0     
----------------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 0, global step 1: 'val_accuracy' reached 0.08333 (best 0.08333), saving model to 'g:\\GitHub\\multiannotator-benchmarks\\AutogluonModels\\ag-20240105_203339\\epoch=0-step=1.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 1, global step 2: 'val_accuracy' reached 0.00000 (best 0.08333), saving model to 'g:\\GitHub\\multiannotator-benchmarks\\AutogluonModels\\ag-20240105_203339\\epoch=1-step=2.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 2, global step 3: 'val_accuracy' reached 0.12500 (best 0.12500), saving model to 'g:\\GitHub\\multiannotator-benchmarks\\AutogluonModels\\ag-20240105_203339\\epoch=2-step=3.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 3, global step 4: 'val_accuracy' reached 0.12500 (best 0.12500), saving model to 'g:\\GitHub\\multiannotator-benchmarks\\AutogluonModels\\ag-20240105_203339\\epoch=3-step=4.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 4, global step 5: 'val_accuracy' was not in top 3


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 5, global step 6: 'val_accuracy' was not in top 3


Validation: 0it [00:00, ?it/s]

Predicting: 0it [00:00, ?it/s]

Predicting: 0it [00:00, ?it/s]

Predicting: 0it [00:00, ?it/s]

Predicting: 0it [00:00, ?it/s]

AttributeError: 'MultiModalPredictor' object has no attribute 'predict_feature'

## Read per-fold pickle files from xvalidation and save data as numpy arrays

In [None]:
# load pickle file util
def load_pickle(pickle_file_name, verbose=1):
    """Load pickle file"""

    if verbose:
        print(f"Loading {pickle_file_name}")

    with open(pickle_file_name, 'rb') as handle:
        out = pickle.load(handle)
        
    return out

# get the original label from file path (aka "true labels" y)
get_orig_label_idx_from_file_path = np.vectorize(lambda f: label_name_to_idx_map[Path(f).parts[-2]])

# get original label name to idx mapping
label_name_to_idx_map = {'airplane': 0,
                         'automobile': 1,
                         'bird': 2,
                         'cat': 3,
                         'deer': 4,
                         'dog': 5,
                         'frog': 6,
                         'horse': 7,
                         'ship': 8,
                         'truck': 9}

#### Save pickle files per fold as single files per model

In [None]:
results_list = []

for model in models:

    pred_probs = []
    labels = []
    images = []
    
    for split_num in range(num_cv_folds):

        out_subfolder = f"{model_folder}_{model}/split_{split_num}/"
        
        # pickle file name to read
        get_pickle_file_name = (
            lambda object_name: f"{out_subfolder}_{object_name}_split_{split_num}"
        )

        # NOTE: the "test_" prefix in the pickle name correspond to the "test" split during cross-validation.
        pred_probs_split = load_pickle(get_pickle_file_name("test_pred_probs"), verbose=verbose)
        labels_split = load_pickle(get_pickle_file_name("test_labels"), verbose=verbose)
        images_split = load_pickle(get_pickle_file_name("test_image_files"), verbose=verbose)
        indices_split = load_pickle(get_pickle_file_name("test_indices"), verbose=verbose)

        # append to list so we can combine data from all the splits
        pred_probs.append(pred_probs_split)
        labels.append(labels_split)
        images.append(images_split)    

    # convert list to array
    pred_probs = np.vstack(pred_probs)
    labels = np.hstack(labels) # remember that this is the noisy labels (s)
    images = np.hstack(images)
    
    # get the true labels (y) from the original file path
    true_labels = get_orig_label_idx_from_file_path(images)
    
    # save to Numpy files
    numpy_out_folder = f"{model_folder}_{model}/"
    
    print(f"Saving to numpy files in this folder: {numpy_out_folder}")
    
    np.save(numpy_out_folder + "pred_probs", pred_probs)
    np.save(numpy_out_folder + "labels", labels)
    np.save(numpy_out_folder + "images", images)
    np.save(numpy_out_folder + "true_labels", true_labels)

    # check the accuracy
    acc_labels = (pred_probs.argmax(axis=1) == labels).mean() # noisy labels (s)
    acc_true_labels = (pred_probs.argmax(axis=1) == true_labels).mean() # true labels (y)    
    acc_noisy_vs_true_labels = (labels == true_labels).mean()
    
    print(f"Model: {model}")
    print(f"  Accuracy (argmax pred vs labels): {acc_labels}")
    print(f"  Accuracy (argmax pred vs true labels) : {acc_true_labels}")
    print(f"  Accuracy (labels vs true labels)       : {acc_noisy_vs_true_labels}")
    
    results = {
        "model": model,
        "Accuracy (argmax pred vs noisy labels)": acc_labels,
        "Accuracy (argmax pred vs true labels)": acc_true_labels,
        "Accuracy (noisy vs true labels)": acc_noisy_vs_true_labels
    }
    
    results_list.append(results)

In [None]:
for model in models:
    numpy_out_folder = f"{model_folder}_{model}/"

    pred_probs = np.load(numpy_out_folder + 'pred_probs.npy')
    labels = np.load(numpy_out_folder + 'labels.npy')
    true_labels =  np.load(numpy_out_folder + 'true_labels.npy')
    print(f'{model}\n pred_probs[{pred_probs.shape}],labels[{labels.shape}], true_labels[{true_labels.shape}]\n')