In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

from IPython.core.display import display, HTML
html_str = "<style>.container { width:99% !important; }\n"
html_str += "div.cell.selected { border-left-width: 1px !important; }\n"
html_str += "div.output_scroll { resize: vertical !important }</style>"
display(HTML(html_str))

# Move up one folder to reach the repo root
%cd ..

In [None]:
# Paths, Imports & Configs
import matplotlib.pyplot as plt

from utils.generic import get_answer_to_family_map, chain_load_experiment_stats, separate_stats_by_set
from utils.generic import chain_load_batch_metrics
from utils.notebook.result_analysis import load_experiment_predictions, sort_correct_incorrect_predictions
from utils.notebook.result_analysis import plot_confusion_matrix, plot_predictions_distribution_per_question_family
from utils.notebook.result_analysis import plot_predictions_confidence, plot_acc_loss_by_epoch, plot_predictions_confidence_gap
from utils.notebook.generic import separate_preds_ground_truth

root_data_path = "data"
root_output_path = "output_synced/training"

normalize_histograms = False
normalize_confusion_matrices = False

#experiment_name = "v3_noReverbnoBackground_5k_40_inst_1024_win_50_overlap_BEAST"
#experiment_name = "v3_fixed_5k_40_inst_1024_win_50_overlap_hpc-puget-necotis"
#experiment_name = "v3_fixed_2k_40_inst_1024_win_50_overlap_BEAST-pool-batch-64"
experiment_name = "CLEAR_50k_4_inst_1024_win_50_overlap_extractor_parallel_3_block_64_proj_40_epoch_67557_extractor"
experiment_date = "2020-05-14_20h35"
#experiment_date = "latest"
experiment_output_path = f"{root_output_path}/{experiment_name}/{experiment_date}"
epoch_id = "best"

data_name = "CLEAR_50k_4_inst_1024_win_50_overlap"
data_path = f"{root_data_path}/{data_name}"


In [None]:
# Load data from files
answer_to_family_map = get_answer_to_family_map(f'{data_path}/attributes.json', to_lowercase=True, reduced_text=True)
answer_families = list(set(answer_to_family_map.values()))

processed_predictions = {
    'train': load_experiment_predictions(experiment_output_path, epoch_id, set_type='train', reduced_text=True),
    'val': load_experiment_predictions(experiment_output_path, epoch_id, set_type='val', reduced_text=True),
    'test': load_experiment_predictions(experiment_output_path, epoch_id, set_type='test', reduced_text=True)
}



epoch_stats = chain_load_experiment_stats(experiment_output_path, cast_to_float=True)
train_epoch_stats, val_epoch_stats = separate_stats_by_set(epoch_stats, set_types=['train', 'val'])
batches_metrics = chain_load_batch_metrics(experiment_output_path, continue_training=True)
import pandas as pd
batches_metrics = pd.DataFrame(batches_metrics)

# Sort correct & Incorrect predictions (Distinction between correct/incorrect question family when incorrect prediction)
train_processed_predictions_sorted = sort_correct_incorrect_predictions(train_processed_predictions)
val_processed_predictions_sorted = sort_correct_incorrect_predictions(val_processed_predictions)

train_predictions, train_ground_truths = separate_preds_ground_truth(train_processed_predictions, attribute="ground_truth_answer_family")
val_predictions, val_ground_truths = separate_preds_ground_truth(val_processed_predictions, attribute="ground_truth_answer_family")

## Accuracy & Loss

In [None]:
plot_acc_loss_by_epoch({'train': train_epoch_stats, 'val': val_epoch_stats})

In [None]:

def plot_acc_loss_lr_by_batch(batches_metrics, smoothing_window=50, set_types=['train','val'], show_fig=False, fig_ax=None):
    if fig_ax:
        fig, axs = fig_ax
    else:
        fig, axs = plt.subplots(3,1)
        
    axs[0].set_title("Accuracy by batches")
    axs[1].set_title("Loss by batches")
    axs[2].set_title("Learning Rate by batches")
    
    for set_type in set_types:
        axs[0].plot(batches_metrics[f'{set_type}_acc'].rolling(smoothing_window).mean(), label=f"{set_type.capitalize()} Accuracy")
        axs[1].plot(batches_metrics[f'{set_type}_loss'].rolling(smoothing_window).mean(), label=f"{set_type.capitalize()} Loss")
        axs[2].plot(batches_metrics[f'{set_type}_lr'], label=f"{set_type.capitalize()} LR")
        
    for ax in axs:
        ax.legend()
    

plot_acc_loss_lr_by_batch(batches_metrics, set_types=['train', 'val'], smoothing_window=500)    

## Confusion Matrices

In [None]:
train_predictions_families = [answer_to_family_map[p] for p in train_predictions['all']]
train_ground_truths_families = [answer_to_family_map[p] for p in train_ground_truths['all']]
val_predictions_families = [answer_to_family_map[p] for p in val_predictions['all']]
val_ground_truths_families = [answer_to_family_map[p] for p in val_ground_truths['all']]

plot_confusion_matrix(train_predictions_families, train_ground_truths_families, title="Train confusion matrix by answer Families", normalize=normalize_confusion_matrices)
fig, ax = plot_confusion_matrix(val_predictions_families, val_ground_truths_families, title="Val confusion matrix by answer Families", normalize=normalize_confusion_matrices)

In [None]:
# Might need to big_fig.tight_layout() after resizing the plot
big_fig_train, ax = plot_confusion_matrix(train_predictions['all'], train_ground_truths['all'], title="Train confusion matrix", normalize=normalize_confusion_matrices)
big_fig_val, ax = plot_confusion_matrix(val_predictions['all'], val_ground_truths['all'], title="Val confusion matrix", normalize=normalize_confusion_matrices)

In [None]:
for family in train_predictions.keys():
    if family == 'all':
        # All answer already plotted to make sure its the first
        continue
    
    fig, ax = plot_confusion_matrix(train_predictions[family], train_ground_truths[family], title=f"[{family.capitalize()}]Train confusion matrix", normalize=normalize_confusion_matrices)


In [None]:
for family in val_predictions.keys():
    if family == 'all':
        # All answer already plotted to make sure its the first
        continue
    
    fig, ax = plot_confusion_matrix(val_predictions[family], val_ground_truths[family], title=f"[{family.capitalize()}]Val confusion matrix", normalize=normalize_confusion_matrices)


In [None]:
from utils.file import read_json
def load_scenes(data_path, set_type):
    
    scenes = read_json(f"{data_path}/scenes/CLEAR_{set_type}_scenes.json")['scenes']
    
    for scene in scenes:
        scene['total_duration'] = scene['silence_before'] + sum(o['duration'] + o['silence_after'] for o in scene['objects'])
        
    return scenes

train_scenes = load_scenes(data_path, 'train')
val_scenes = load_scenes(data_path, 'val')
test_scenes = load_scenes(data_path, 'test')

In [None]:
train_scenes[0]

In [None]:
# TODO : Plot accuracy based on duration -- We could simply create an histogram with durations separated into bins and average the value for a given bin
#                                        -- We could also count how many questions are incorrect based on the duration <- I think this is better
#                                        -- We could also refer to the answer confidence

def get_prediction_scene_durations(scenes, predictions, key='correct')
    key_true = []
    key_false = []
    for prediction in predictions:

        scene_duration = scenes[prediction['scene_id']]['total_duration']
        if prediction[key]:
            key_true.append(scene_duration)
        else:
            key_false.append(scene_duration)
            
    return key_true, key_false

correct_pred_durations, incorrect_pred_durations = get_prediction_scene_durations(test_scenes,)
        



In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1,1)
ax.hist(correct_pred_durations, bins=10)
ax.hist(incorrect_pred_durations, bins=10)

## Analyse Correct/Incorrect predictions per family

In [None]:
plot_predictions_distribution_per_question_family(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms, all_x_labels=answer_families)

## Analyse confidence in predictions

In [None]:
plot_predictions_confidence_gap(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms)

In [None]:
plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms)

In [None]:
for family in answer_families:
    plot_predictions_confidence(train_processed_predictions_sorted, val_processed_predictions_sorted, question_family=family, norm_hist=normalize_histograms)

In [None]:
plot_predictions_confidence_gap(train_processed_predictions_sorted, val_processed_predictions_sorted, norm_hist=normalize_histograms)

In [None]:
for family in answer_families:
    plot_predictions_confidence_gap(train_processed_predictions_sorted, val_processed_predictions_sorted, question_family=family, norm_hist=normalize_histograms)