In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

from IPython.core.display import display, HTML
html_str = "<style>.container { width:99% !important; }\n"
html_str += "div.cell.selected { border-left-width: 1px !important; }\n"
html_str += "div.output_scroll { resize: vertical !important }</style>"
display(HTML(html_str))

# Move up one folder to reach the repo root
%cd ..

In [None]:
# Paths, Imports & Configs
from stats import helper
import matplotlib.pyplot as plt
root_data_path = "data"
root_output_path = "output/train_film"

normalize_histograms = False
normalize_confusion_matrices = False

#experiment_name = "v3_noReverbnoBackground_5k_40_inst_1024_win_50_overlap_BEAST"
experiment_name = "v3_fixed_5k_40_inst_1024_win_50_overlap_hpc-puget-necotis"
experiment_date = "2019-11-07_01h43"
#experiment_date = "latest"
experiment_output_path = f"{root_output_path}/{experiment_name}/{experiment_date}"
epoch_id = "best"

data_name = "v3_fixed_5k_40_inst_1024_win_50_overlap"
data_path = f"{root_data_path}/{data_name}"


In [None]:
# Load data from files
answer_to_family_map = helper.get_answer_to_family_map(f'{data_path}/attributes.json', to_lowercase=True, reduced_text=True)
answer_families = list(set(answer_to_family_map.values()))

train_processed_predictions = helper.load_experiment_predictions(experiment_output_path, epoch_id, set_type='train', reduced_text=True)
val_processed_predictions = helper.load_experiment_predictions(experiment_output_path, epoch_id, set_type='val', reduced_text=True)

# Sort correct & Incorrect predictions (Distinction between correct/incorrect question family when incorrect prediction)
train_processed_predictions_sorted = helper.sort_correct_incorrect_predictions(train_processed_predictions)
val_processed_predictions_sorted = helper.sort_correct_incorrect_predictions(val_processed_predictions)

train_predictions, train_ground_truths = helper.separate_preds_ground_truth(train_processed_predictions, attribute="ground_truth_answer_family")
val_predictions, val_ground_truths = helper.separate_preds_ground_truth(val_processed_predictions, attribute="ground_truth_answer_family")

## Confusion Matrices

In [None]:
train_predictions_families = [answer_to_family_map[p] for p in train_predictions['all']]
train_ground_truths_families = [answer_to_family_map[p] for p in train_ground_truths['all']]
val_predictions_families = [answer_to_family_map[p] for p in val_predictions['all']]
val_ground_truths_families = [answer_to_family_map[p] for p in val_ground_truths['all']]

helper.plot_confusion_matrix(train_predictions_families, train_ground_truths_families, title="Train confusion matrix by answer Families", normalize=normalize_confusion_matrices)
fig, ax = helper.plot_confusion_matrix(val_predictions_families, val_ground_truths_families, title="Val confusion matrix by answer Families", normalize=normalize_confusion_matrices)

In [None]:
# Might need to big_fig.tight_layout() after resizing the plot
big_fig_train, ax = helper.plot_confusion_matrix(train_predictions['all'], train_ground_truths['all'], title="Train confusion matrix", normalize=normalize_confusion_matrices)
big_fig_val, ax = helper.plot_confusion_matrix(val_predictions['all'], val_ground_truths['all'], title="Val confusion matrix", normalize=normalize_confusion_matrices)

In [None]:
for family in train_predictions.keys():
    if family == 'all':
        # All answer already plotted to make sure its the first
        continue
    
    fig, ax = helper.plot_confusion_matrix(train_predictions[family], train_ground_truths[family], title=f"[{family.capitalize()}]Train confusion matrix", normalize=normalize_confusion_matrices)


In [None]:
for family in val_predictions.keys():
    if family == 'all':
        # All answer already plotted to make sure its the first
        continue
    
    fig, ax = helper.plot_confusion_matrix(val_predictions[family], val_ground_truths[family], title=f"[{family.capitalize()}]Val confusion matrix", normalize=normalize_confusion_matrices)


## Analyse Correct/Incorrect predictions per family

In [None]:
helper.plot_predictions_distribution_per_question_family(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms, all_x_labels=answer_families)

## Analyse confidence in predictions

In [None]:
helper.plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms)

In [None]:
for family in answer_families:
    helper.plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, question_family=family, norm_hist=normalize_histograms)