In [None]:
# Magic functions
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# Move up one folder to reach the repo root
%cd ..

In [None]:
# Paths, Imports & Configs
from stats import helper
import matplotlib.pyplot as plt
root_data_path = "data"
root_output_path = "output/train_film"

normalize_histograms = True

experiment_name = "v3_fixed_1k_10_inst_1024_win_50_overlap_BEAST"
experiment_date = "2019-10-30_01h26"
experiment_output_path = f"{root_output_path}/{experiment_name}/{experiment_date}"
epoch_id = "best"


data_name = "v3_resnet_1k_5_inst_1024_win_50_overlap"
data_path = f"{root_data_path}/{data_name}"

# TODO : Get some hyperparameters


In [None]:
# Load data from files
answer_to_family_map = helper.get_answer_to_family_map(f'{data_path}/attributes.json', to_lowercase=True, reduced_text=True)
answer_families = list(set(answer_to_family_map.values()))

train_processed_predictions = helper.load_experiment_predictions(experiment_output_path, epoch_id, set_type='train', reduced_text=True)
val_processed_predictions = helper.load_experiment_predictions(experiment_output_path, epoch_id, set_type='val', reduced_text=True)

# Sort correct & Incorrect predictions (Distinction between correct/incorrect question family when incorrect prediction)
train_processed_predictions_sorted = helper.sort_correct_incorrect_predictions(train_processed_predictions)
val_processed_predictions_sorted = helper.sort_correct_incorrect_predictions(val_processed_predictions)

train_predictions, train_ground_truths = helper.separate_preds_ground_truth(train_processed_predictions, attribute='prediction_answer_family')

train_predictions_families = [answer_to_family_map[p] for p in train_predictions['all']]
train_ground_truths_families = [answer_to_family_map[p] for p in train_ground_truths['all']]

## Confusion Matrices

In [None]:
fig, ax = helper.plot_confusion_matrix(train_predictions_families, train_ground_truths_families, title="Train confusion matrix by answer Families", normalize=False)

In [None]:
figs = []
# Might need to figs[0].tight_layout() after resizing the plot
fig, ax = helper.plot_confusion_matrix(train_predictions['all'], train_ground_truths['all'], title="Train confusion matrix", normalize=False)
figs.append(fig)

for family in train_predictions.keys():
    if family == 'all':
        # All answer already plotted to make sure its the first
        continue
    
    fig, ax = helper.plot_confusion_matrix(train_predictions[family], train_ground_truths[family], title=f"[{family.capitalize()}]Train confusion matrix", normalize=False)
    figs.append(fig)


## Analyse Correct/Incorrect predictions per family

In [None]:
helper.plot_distribution_per_question_family(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms, all_x_labels=answer_families)

## Analyse confidence in predictions

In [None]:
helper.plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms)

In [None]:
for family in answer_families:
    helper.plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, question_family=family, norm_hist=normalize_histograms)