In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import argparse
from argparse import Namespace

import models.models_original as models_original
import models.models_3d_atomics as models_3d_atomics
import models.models_3d as models_3d
from models.data import *
from models.helper import *
from models.param_initializations import *
from models.optimization_strategy import *


In [2]:
args = {
    'random_states': [1],
    'dataset': 'spoken_arabic_digits',
    'model': 'atomics',
    'pruning': 'gradient_magnitude', # weight_gradient_magnitude
    'device': 'cuda',
    'save_load_path': '/workdir/optimal-summaries-public/_models2/',
    'n_concepts': 4,
    'n_atomics': 10,
    'switch_encode_dim': True, # default True
    'switch_summaries_layer': False, # default True
    'switch_indicators': True, # default True
    'switch_use_only_last_timestep': False, # default False
    # 'switch_use_summaries': True, # default True
}

args = Namespace(**args)

print("All arguments:")
for arg in vars(args):
    print(f"{arg}: {getattr(args, arg)}")


All arguments:
random_states: [1]
dataset: spoken_arabic_digits
model: atomics
pruning: gradient_magnitude
device: cuda
save_load_path: /workdir/optimal-summaries-public/_models2/
n_concepts: 4
n_atomics: 10
switch_encode_dim: True
switch_summaries_layer: False
switch_indicators: True
switch_use_only_last_timestep: False


In [3]:
def get_dataloader(random_state):
    set_seed(random_state)

    if args.dataset == "mimic":
        return get_MIMIC_dataloader(random_state = random_state)
    elif args.dataset == "tiselac":
        return get_tiselac_dataloader(random_state = random_state)
    elif args.dataset == "spoken_arabic_digits":
        return get_arabic_spoken_digits_dataloader(random_state = random_state)
    else:
        print("No known dataset selected")
        sys.exit(1)


def get_model(random_state):
    set_seed(random_state)
    
    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_dataloader(random_state)
    
    if args.model == "original":
        model = models_original.CBM(n_concepts=args.n_concepts, use_indicators=args.switch_indicators, use_only_last_timestep=args.switch_use_only_last_timestep, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=num_classes, device=args.device)
    elif args.model == "shared":
        model = models_3d.CBM(n_concepts=args.n_concepts, encode_time_dim=args.switch_encode_dim, use_indicators=args.switch_indicators, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=num_classes, device=args.device)
    elif args.model == "atomics":
        model = models_3d_atomics.CBM(n_concepts=args.n_concepts, n_atomics=args.n_atomics, use_summaries_for_atomics=args.switch_summaries_layer, use_indicators=args.switch_indicators, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=num_classes, device=args.device)
    else:
        print("No known model selected")
        sys.exit(1)
    return model


def get_trained_model(random_state):
    set_seed(random_state)

    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_dataloader(random_state)
    
    model = get_model(random_state)
    model_path = model.get_model_path(base_path=args.save_load_path, dataset=args.dataset, pruning=args.pruning, seed=random_state)
    model.try_load_else_fit(train_loader, val_loader, p_weight=class_weights, save_model_path=model_path, max_epochs=10000, save_every_n_epochs=10, patience=10, sparse_fit=False)

    evaluate_classification(model=model, dataloader=val_loader, num_classes=num_classes)
    
    return model


def get_metrics(num_classes):
    if num_classes == 2:
        auroc_metric = AUROC(task="binary").to(args.device)
        accuracy_metric = Accuracy(task="binary").to(args.device)
        f1_metric = F1Score(task="binary").to(args.device)
        # conf_matrix = ConfusionMatrix(task="binary").to(args.device)
    else:
        average = "macro"
        auroc_metric = AUROC(task="multiclass", num_classes=num_classes, average = average).to(args.device)
        accuracy_metric = Accuracy(task="multiclass", num_classes=num_classes, top_k=1, average = average).to(args.device)
        f1_metric = F1Score(task="multiclass", num_classes=num_classes, top_k=1, average = average).to(args.device)
        # conf_matrix = ConfusionMatrix(task="multiclass", num_classes=num_classes).to(args.device)
    
    return {"acc": accuracy_metric, "f1": f1_metric, "auc": auroc_metric}


In [4]:
makedir(args.save_load_path)


In [5]:
result_df = pd.DataFrame(columns=["Model", "Dataset", "Seed", "Split", "Pruning", "Finetuned", "AUC", "ACC", "F1"])

for random_state in args.random_states:
    model = get_trained_model(random_state)
    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_dataloader(random_state = random_state)
    model.opt_lr = 1e-4




Loaded model from /workdir/optimal-summaries-public/_models2/spoken_arabic_digits/atomics/gradient_magnitude/atomics_num_concepts_4_num_atomics_10_use_summaries_for_atomics_False_use_indicators_True_seed_1.pt
AUC macro 0.629
ACC macro 0.230
 F1 macro 0.167


In [None]:
# base
metrics = evaluate_classification(model, val_loader)
result_df.loc[len(result_df)] = {"Model": model.get_short_model_name(), "Dataset": args.dataset, "Seed": random_state, "Split": "val", "Pruning": "Before", "Finetuned": False, "AUC": metrics[0], "ACC": metrics[1], "F1": metrics[2]}
metrics = evaluate_classification(model, test_loader)
result_df.loc[len(result_df)] = {"Model": model.get_short_model_name(), "Dataset": args.dataset, "Seed": random_state, "Split": "test", "Pruning": "Before", "Finetuned": False, "AUC": metrics[0], "ACC": metrics[1], "F1": metrics[2]}


In [6]:
# prune and finetune
new_model_path = add_subfolder(model.save_model_path, "finetuned")
makedir(new_model_path)

start_n_weights = [layer.weight.numel() for layer in model.regularized_layers]
end_n_weights = [layer.weight.shape[0] * 10 for layer in model.regularized_layers] # feature budget

iterative_steps = [list(np.linspace(start, end, 21, dtype=int))[1:] for start, end in zip(start_n_weights, end_n_weights)]


In [7]:
# fill ema gradient by fit -> repeat: mask, clear, fit, evaluate
model.fit(train_loader, val_loader, p_weight=class_weights, save_model_path=new_model_path, max_epochs=1, save_every_n_epochs=1, patience=1)


Loaded model from /workdir/optimal-summaries-public/_models2/spoken_arabic_digits/atomics/gradient_magnitude/finetuned/atomics_num_concepts_4_num_atomics_10_use_summaries_for_atomics_False_use_indicators_True_seed_1.pt


100%|██████████| 1/1 [00:00<00:00,  1.26 epoch/s, Train Loss=2.18970, Val Loss=2.21694, Best Val Loss=2.21694]

Pre load [False, False]
Post load [False, False]





2.2169439792633057

In [9]:
[layer.ema_gradient is None for layer in model.regularized_layers]

[False, False]

In [None]:
for step in zip(*iterative_steps):
    
    if args.pruning == "weight_magnitude":
        model.mask_by_weight_magnitude(step)
        
    elif args.pruning == "gradient_magnitude":
        model.mask_by_gradient_magnitude(step)
        
    elif args.pruning == "weight_gradient_magnitude":
        model.mask_by_weight_gradient_magnitude(step)
        
    model.clear_ema_gradient()
    model.fit(train_loader, val_loader, p_weight=class_weights, save_model_path=new_model_path, max_epochs=1000, save_every_n_epochs=1, patience=10)


In [None]:
metrics = evaluate_classification(model, val_loader)
result_df.loc[len(result_df)] = {"Model": model.get_short_model_name(), "Dataset": args.dataset, "Seed": random_state, "Split": "val", "Pruning": args.pruning, "Finetuned": True, "AUC": metrics[0], "ACC": metrics[1], "F1": metrics[2]}
metrics = evaluate_classification(model, test_loader)
result_df.loc[len(result_df)] = {"Model": model.get_short_model_name(), "Dataset": args.dataset, "Seed": random_state, "Split": "test", "Pruning": args.pruning, "Finetuned": True, "AUC": metrics[0], "ACC": metrics[1], "F1": metrics[2]}



In [None]:

results_path = model.get_model_path(base_path=args.save_load_path, dataset=args.dataset, pruning=args.pruning, ending="_results.csv")
results_path = add_subfolder(results_path, "results")
write_df_2_csv(results_path, result_df)



print("Done")
