# **Train and evaluate a time series classifier**

# 1) Preprocess the data

We need to chop the data into chunks of a specified length (e.g. 30s intervals), while making sure that the intervals are not interrupted by excluded bins

In [None]:
# Import dependencies from sktime environment:

from __future__ import print_function
import time
from datetime import datetime

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

%matplotlib inline

In [None]:
filename = 'States_ceiling_reduced.csv'
df = pd.read_csv(filename)

# Rename first column to "Data_idx"
l_colums = list(df.columns)
l_colums[0] = 'Data_idx'
df.columns = l_colums
df

#

In [None]:
# How long [in seconds] does the interval have to be?
interval_size_s = 20

# Create column that indicates for each bin, whether the bins for the following 'interval_size_s'-seconds exist
df['next_{}s_clean'.format(str(interval_size_s))] = False
interval_size_bins = interval_size_s*4

# These computations have to be done on a per session level
time_start = time.time()
for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        df_temp = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session)].copy()
        
        # We don´t have to check the last 'interval_size_bins'-bins for each session, since they can´t have enough clean bins following
        for row in range(df_temp.shape[0]-interval_size_bins):         
            # Get the bin count of the current row
            start_bin = df_temp.iloc[row]['Bin']
            # Get the bin count of that row, that is 'interval_size_bins'-rows afterwards
            last_bin = df_temp.iloc[row + interval_size_bins]['Bin']
            # Does the difference between the two match interval_size_bins? If not, last_bin was too large since there were some bins missing inbetween
            if last_bin - start_bin == interval_size_bins:
                # If matching, mark the corresponding bin as True in the original df
                df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session)
                            & (df['Bin'] == start_bin), 'next_{}s_clean'.format(str(interval_size_s))] = True
                
    print('Done with {}\nTime elapsed so far: {} seconds\n---------------------------------'.format(mouse, time.time()-time_start))
            

In [None]:
df['{}s_interval_ID'.format(str(interval_size_s))] = -1
interval_id_count = 0

for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        # Initial check, whether intervals are present to begin the while loop:
        intervals_present = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session)
                                & (df['next_{}s_clean'.format(str(interval_size_s))] == True) 
                                & (df['{}s_interval_ID'.format(str(interval_size_s))] == -1)].shape[0] > 0
        
        while intervals_present == True:
            # Find the next bin that was already identified as potential start_bin (next_Xs_clean == True) 
            # and which is not yet included in an interval (Xs_interval_ID == -1)
            idx_start_bin = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session)
                                    & (df['next_{}s_clean'.format(str(interval_size_s))] == True) 
                                    & (df['{}s_interval_ID'.format(str(interval_size_s))] == -1)].index[0]

            # Starting from this bin, mark the corresponding bins (interval_size_bin) with one interval ID:
            # First, identify the position of that index in the index list to find the index of the last_bin of that interval
            position_idx_start_bin = df.index.to_list().index(idx_start_bin)

            idx_last_bin = df.index.to_list()[position_idx_start_bin + interval_size_bins - 1]

            df.loc[idx_start_bin : idx_last_bin, '{}s_interval_ID'.format(str(interval_size_s))] = interval_id_count
            
            interval_id_count = interval_id_count + 1
            
            # Update intervals_present:
            intervals_present = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session)
                                    & (df['next_{}s_clean'.format(str(interval_size_s))] == True) 
                                    & (df['{}s_interval_ID'.format(str(interval_size_s))] == -1)].shape[0] > 0



print('Congratulations, you found {} intervals!'.format(df['{}s_interval_ID'.format(str(interval_size_s))].unique().shape[0] - 1))

## Some sanity checks:

#### Are there multiple mice in any interval? Corresponding IDs will show up in the list

In [None]:
[elem for elem in df['{}s_interval_ID'.format(str(interval_size_s))].unique() if df.loc[df['{}s_interval_ID'.format(str(interval_size_s))] == elem, 'Animal_ID'].unique().shape[0] != 1]

#### Are there multiple sessions in any interval? Corresponding IDs will show up in the list

In [None]:
[elem for elem in df['{}s_interval_ID'.format(str(interval_size_s))].unique() if df.loc[df['{}s_interval_ID'.format(str(interval_size_s))] == elem, 'Session'].unique().shape[0] != 1]

#### Are there any intervals that don´t have the correct length? Corresponding IDs will show up in the list

In [None]:
[elem for elem in df['{}s_interval_ID'.format(str(interval_size_s))].unique() if df.loc[df['{}s_interval_ID'.format(str(interval_size_s))] == elem].shape[0] != interval_size_bins]

## Alright, then let´s finish the pre-processing with some final steps: 

#### Add "early-mid-late" labels:

In [None]:
d_session_times = {}

# For each session type, get the maximum time and use that to define the borders between early-mid-late chunks:
for session in df['Session'].unique():
    d_session_times[session] = {}
    d_session_times[session]['max_session_time'] = df.loc[df['Session'] == session, 'Times'].max()
    d_session_times[session]['early-mid'] = round(d_session_times[session]['max_session_time'] / 3, 0)
    d_session_times[session]['mid-late'] = round(d_session_times[session]['max_session_time'] / 3 * 2, 0)

df['session_time_label'] = ''
# For each session type, assign the corresponding label to each bin
for session in df['Session'].unique():
    # Mark as early:
    df.loc[(df['Session'] == session) & (df['Times'] < d_session_times[session]['early-mid']), 'session_time_label'] = 'early'
    # Mark as mid:
    df.loc[(df['Session'] == session) & (df['Times'] >= d_session_times[session]['early-mid']) 
                & (df_test['Times'] < d_session_times[session]['mid-late']), 'session_time_label'] = 'mid'
    # Mark as late:
    df.loc[(df_test['Session'] == session) & (df['Times'] >= d_session_times[session]['mid-late']), 'session_time_label'] = 'late'

### Visualize the distribution of all labels to check for a ~balanced distribution (also across mice):

In [None]:
d_interval_counts = {'Animal_ID': [],
                     'Session': [],
                     'session_time_label': [],
                     'interval_count': []}

for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        for session_time_label in df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'session_time_label'].unique():
            d_interval_counts['Animal_ID'].append(mouse)
            d_interval_counts['Session'].append(session)
            d_interval_counts['session_time_label'].append(session_time_label)
            interval_count = len([elem for elem in 
                                  df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session) & (df['session_time_label'] == session_time_label), '{}s_interval_ID'.format(str(interval_size_s))].unique()
                                  if elem != -1])
            d_interval_counts['interval_count'].append(interval_count)
        
df_interval_counts = pd.DataFrame(data=d_interval_counts)

fig = plt.figure(figsize=(20,12), facecolor='white')
gs = fig.add_gridspec(2,2)

ax1 = fig.add_subplot(gs[0,0])
sns.stripplot(data=df_interval_counts.loc[df_interval_counts['Session'] == 'OF'], x='session_time_label', y='interval_count', hue='Animal_ID', palette='Spectral', ax=ax1, size=8, dodge=True)
plt.title('OF', fontsize=18)
ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))

ax2 = fig.add_subplot(gs[0,1], sharey=ax1)
sns.stripplot(data=df_interval_counts.loc[df_interval_counts['Session'] == 'EPM'], x='session_time_label', y='interval_count', hue='Animal_ID', palette='Spectral', ax=ax2, size=8, dodge=True)
plt.title('EPM', fontsize=18)
ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))

ax3 = fig.add_subplot(gs[1,0], sharey=ax1)
sns.stripplot(data=df_interval_counts.loc[df_interval_counts['Session'] == 'CD1'], x='session_time_label', y='interval_count', hue='Animal_ID', palette='Spectral', ax=ax3, size=8, dodge=True)
plt.title('CD1', fontsize=18)
ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5))

ax4 = fig.add_subplot(gs[1,1], sharey=ax1)
sns.stripplot(data=df_interval_counts.loc[df_interval_counts['Session'] == 'CD2'], x='session_time_label', y='interval_count', hue='Animal_ID', palette='Spectral', ax=ax4, size=8, dodge=True)
plt.title('CD2', fontsize=18)
ax4.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.tight_layout()
plt.show()

# As alternative:
# sns.catplot(data=df_interval_counts, x='session_time_label', y='interval_count', col='Session', hue='Animal_ID', palette='Spectral')

### Add combo-classification-labels:

In [None]:
df['true_classification_labels'] = df['session_time_label'] + '_' + df['Session']

# Print labels to check whether it worked:
df['true_classification_labels'].unique()

### Add train-test-split labels:

In [None]:
# Fraction of data that should be used for testing:
test_fraction = 0.1


df['train_test_split_labels'] = ''

for classification_label in df['true_classification_labels'].unique():
    l_interval_ids = [elem for elem in df.loc[df['true_classification_labels'] == classification_label, '{}s_interval_ID'.format(str(interval_size_s))].unique() if elem != -1]
    interval_count = len(l_interval_ids)
    l_test_ids = random.sample(l_interval_ids, int(test_fraction*interval_count) + 1)
    l_train_ids = [elem for elem in l_interval_ids if elem not in l_test_ids]
    
    df.loc[df['{}s_interval_ID'.format(str(interval_size_s))].isin(l_test_ids), 'train_test_split_labels'] = 'test'
    df.loc[df['{}s_interval_ID'.format(str(interval_size_s))].isin(l_train_ids), 'train_test_split_labels'] = 'train'

In [None]:
total_usable_bins = df.loc[df['{}s_interval_ID'.format(str(interval_size_s))] != -1].shape[0]
train_bins = df.loc[df['train_test_split_labels'] == 'train'].shape[0]
test_bins = df.loc[df['train_test_split_labels'] == 'test'].shape[0]

if train_bins + test_bins == total_usable_bins:
    print('Overall, we can use {}% of all bins for the time-series classifier'.format(round(total_usable_bins / df.shape[0] * 100, 2)))
    print('{}% of these usable bins (and {} of all bins) will be used for training'.format(round(train_bins/total_usable_bins * 100, 2), round(train_bins / df.shape[0] * 100, 2)))
    print('Consequently, the remaining {}% of the usable bins (and {} of all bins) will be used for testing'.format(round(test_bins/total_usable_bins * 100, 2), round(test_bins / df.shape[0] * 100, 2)))

## All pre-processing is done. Let´s save the data and continue with training of the classifiers! :-)

In [None]:
df.to_csv('States_ceiling_reduced_for_classification.csv')

# 2) Train & evaluate the classifiers

#### If training was already performed just load the dependencies and set `train_and_evaluate_the_classifiers` to `False` to load the data

In [None]:
# Import dependencies from sktime environment:

from __future__ import print_function
import time
from datetime import datetime

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from statistics import stdev, mean
import pickle

from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline

from sktime.datasets import load_arrow_head  # univariate dataset
from sktime.datasets.base import load_basic_motions  # multivariate dataset
from sktime.transformations.panel.rocket import MiniRocket, MiniRocketMultivariate

%matplotlib inline

In [None]:
# Set to 'True' if training shall be performed from scratch & remember to also specify the number of runs
train_and_evaluate_the_classifiers = False

# If you want to train another round of classifiers, how many repitions do you wish to compute?
total_runs = 5

In [None]:
interval_size_s = 20

df = pd.read_csv('States_ceiling_reduced_for_classification.csv', index_col = 0)
df_umap_results = pd.read_csv('States_ceiling_reduced2_with_UMAP.csv', index_col = 0)
df_umap_results = df_umap_results[['Data_idx', 'UMAP_cont_r1_1', 'UMAP_cont_r1_2', 'UMAP_cont_r2_1', 'UMAP_cont_r2_2', 'UMAP_cont_r3_1', 'UMAP_cont_r3_2']]
df = pd.merge(df, df_umap_results, on='Data_idx', how='outer')

if train_and_evaluate_the_classifiers == False:
    with open('classifier_results_{}s_intervals.p'.format(str(interval_size_s)), 'rb') as fp:
        d_classification_results = pickle.load(fp)

df

### Train and score the classifiers. This will only be executed, if `train_and_evaluate_the_classifiers` is `True`

In [None]:
if train_and_evaluate_the_classifiers:

    # List of tuples that specify the respective approaches that will be used to train classifiers
    # First element of the tuple is just a descriptive name
    # Second element of the tuple is a list of all dimensions that will be used as timeseries input for the classifiers
    # If the list of dimensions contains more than just one dimension, a multivariate MiniRocket will be used
    # In addition, there are also some approaches that include the string "_shuffled_"
    # This will be detected by an if-condition and cause random shuffling of the classification labels in the training dataset
    l_approaches = [('norm_HeartRate', ['norm_HeartRate']),
                     ('norm_HR_Med_Delta', ['norm_HR_Med_Delta']),
                     ('norm_HR_High_Amp', ['norm_HR_High_Amp']),
                     ('norm_Ceiling', ['norm_Ceiling']),
                     ('norm_DistanceToCeiling', ['norm_DistanceToCeiling']),
                     ('norm_Motion', ['norm_Motion']),
                     ('norm_AreaExplored', ['norm_AreaExplored']),
                     ('norm_Speed', ['norm_Speed']), 
                     ('norm_Temperature', ['norm_Temperature']), 
                     ('all_dimensions', ['norm_HeartRate', 'norm_HR_Med_Delta', 'norm_HR_High_Amp', 'norm_Ceiling', 'norm_DistanceToCeiling',
                                         'norm_Motion', 'norm_AreaExplored', 'norm_Speed',
                                         'norm_Temperature']),
                     ('UMAP1', ['UMAP_cont_r1_1', 'UMAP_cont_r1_2']),
                     ('UMAP2', ['UMAP_cont_r2_1', 'UMAP_cont_r2_2']), 
                     ('UMAP3', ['UMAP_cont_r3_1', 'UMAP_cont_r3_2']),
                     ('univariate_shuffled_1', ['norm_HeartRate']),
                     ('univariate_shuffled_2', ['norm_Ceiling']),
                     ('multivariate_shuffled_all_dimensions', ['norm_HeartRate', 'norm_HR_Med_Delta', 'norm_HR_High_Amp', 'norm_Ceiling', 'norm_DistanceToCeiling',
                                                               'norm_Motion', 'norm_AreaExplored', 'norm_Speed', 
                                                               'norm_Temperature']),
                     ('multivariate_shuffled_UMAP', ['UMAP_cont_r1_1', 'UMAP_cont_r1_2'])]


    # Setup the dictionary that will be fed with all required data and results:
    d_classification_results = {'Results': {'Scores': {'Approach': [], 
                                                       'Run': [], 
                                                       'Score': [],
                                                       'Score_only_times': [],
                                                       'Score_only_sessions': []},
                                           'DataFrames': {'Approach': [],
                                                          'Run': [],
                                                          'DataFrame': []}}}

    # For validation, all computations will be run five times:
    for run in range(1,total_runs+1):
        # Make indiviual dictionaries for each run
        d_classification_results[run] = {}
        for approach in l_approaches:
            # Make individual dictionaries for each approach in each run
            d_classification_results[run][approach[0]] = {}

            # Now setup the individual training and test data dictionaries for each approach (number of dimensions varies)
            for train_test_key in ['train', 'test']:
                d_classification_results[run][approach[0]][train_test_key] = {}

                for dimension in approach[1]:
                    d_classification_results[run][approach[0]][train_test_key]['dim_{}'.format(str(approach[1].index(dimension)).zfill(2))] = []

                for metadata_col in ['Animal_ID', 'Session', 'session_time_label', 'true_classification_label', '{}s_interval_ID'.format(str(interval_size_s))]:
                    d_classification_results[run][approach[0]][train_test_key][metadata_col] = []

            # Add the respective data (while split remains the same for all, the timeseries data varies of course)
            l_interval_ids = [elem for elem in df['{}s_interval_ID'.format(str(interval_size_s))].unique() if elem != -1]
            for interval_id in l_interval_ids:
                # To make things easier, specify a temporary DataFrame
                df_temp = df.loc[df['{}s_interval_ID'.format(str(interval_size_s))] == interval_id].copy()

                # Get the information whether this timeseries was assigned to the training or to the test data
                train_test_split_label = df_temp['train_test_split_labels'].unique()[0]

                # Get the respective timeseries data
                for dimension in approach[1]:
                    timeseries = pd.Series(df_temp[dimension].values)
                    d_classification_results[run][approach[0]][train_test_split_label]['dim_{}'.format(str(approach[1].index(dimension)).zfill(2))].append(timeseries)

                # Get all remaining metadata that might be of relevance:
                Animal_ID = df_temp['Animal_ID'].unique()[0]
                Session = df_temp['Session'].unique()[0]
                session_time_label = df_temp['session_time_label'].unique()[0]

                # Get the desired classification label
                true_classification_label = df_temp['true_classification_labels'].unique()[0]

                # Append all these information to the respective lists for easy conversion in a DataFrame object
                d_classification_results[run][approach[0]][train_test_split_label]['Animal_ID'].append(Animal_ID)
                d_classification_results[run][approach[0]][train_test_split_label]['Session'].append(Session)
                d_classification_results[run][approach[0]][train_test_split_label]['session_time_label'].append(session_time_label)
                d_classification_results[run][approach[0]][train_test_split_label]['true_classification_label'].append(true_classification_label)
                d_classification_results[run][approach[0]][train_test_split_label]['{}s_interval_ID'.format(str(interval_size_s))].append(interval_id)

            # Convert the collected data to the correct format that is required for the training & testing of the classifiers
            df_train_w_meta = pd.DataFrame(data=d_classification_results[run][approach[0]]['train'])
            l_dims = [elem for elem in df_train_w_meta.columns if elem.startswith('dim_')]
            df_x_train = df_train_w_meta[l_dims]
            np_y_train = df_train_w_meta['true_classification_label'].values

            # If approach contains the string "shuffled", the corresponding classification labels of the training dataset will be shuffled
            if 'shuffled' in approach[0]:
                random.shuffle(np_y_train)

            # Same data extraction for the test dataset (no shuffling of classification labels for the test data)
            df_test_w_meta = pd.DataFrame(data=d_classification_results[run][approach[0]]['test'])
            df_x_test = df_test_w_meta[l_dims]
            np_y_test = df_test_w_meta['true_classification_label'].values

            # Use MiniRocketMultivariate if more than one dimension as input
            if len(l_dims) > 1:
                minirocket_type = 'multivariate'
                minirocket_multi = MiniRocketMultivariate()
                minirocket_multi.fit(df_x_train)
                df_x_train_transform = minirocket_multi.transform(df_x_train)
                df_x_test_transform = minirocket_multi.transform(df_x_test)
            # Use univariate version if only a single dimension is used as input
            else:
                minirocket_type = 'univariate'
                minirocket = MiniRocket()
                minirocket.fit(df_x_train)
                df_x_train_transform = minirocket.transform(df_x_train)
                df_x_test_transform = minirocket.transform(df_x_test)


            # Setup and train the classifier:
            classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True, class_weight='balanced')
            classifier.fit(df_x_train_transform, np_y_train)

            # Predict the labels of the test set and store the DataFrame in the dictionary:
            df_test_w_meta['predicted_labels'] = classifier.predict(df_x_test_transform)
            df_test_w_meta['predicted_time'] = [elem[:elem.index('_')] for elem in classifier.predict(df_x_test_transform)]
            df_test_w_meta['predicted_session'] = [elem[elem.index('_') + 1:] for elem in classifier.predict(df_x_test_transform)]
            d_classification_results['Results']['DataFrames']['Approach'].append(approach[0])
            d_classification_results['Results']['DataFrames']['Run'].append(run)
            d_classification_results['Results']['DataFrames']['DataFrame'].append(df_test_w_meta)


            # Score the classifier:

            # 1 - Entire classification label (time + session):
            score = classifier.score(df_x_test_transform, np_y_test)
            print('The {}-classifier reached a score of: {} & {} MiniRocket was used'.format(approach[0], round(score,4), minirocket_type))
            #d_classification_results[run][approach[0]]['Score'] = score

            # 2 - Only times:
            time_score = df_test_w_meta['session_time_label'].eq(df_test_w_meta['predicted_time']).value_counts()[True] / df_test_w_meta.shape[0]

            # 3 - Only session:
            session_score = df_test_w_meta['Session'].eq(df_test_w_meta['predicted_session']).value_counts()[True] / df_test_w_meta.shape[0]


            # Also add all information to the 'Results' part of the main dictionary, so that it can directly be transformed into a DataFrame once all computations are done:
            d_classification_results['Results']['Scores']['Approach'].append(approach[0])
            d_classification_results['Results']['Scores']['Run'].append(run)
            d_classification_results['Results']['Scores']['Score'].append(score)
            d_classification_results['Results']['Scores']['Score_only_times'].append(time_score)
            d_classification_results['Results']['Scores']['Score_only_sessions'].append(session_score)

## This is the evaluation section of the classifiers

In [None]:
df_classifier_results = pd.DataFrame(data=d_classification_results['Results']['Scores'])

fig = plt.figure(figsize=(25,10), facecolor='white')
gs = fig.add_gridspec(1, 3)

fig.add_subplot(gs[0,0])
sns.stripplot(data=df_classifier_results, x='Approach', y='Score_only_times')
plt.xticks(rotation=90)
plt.title('Score on time labels only', fontsize=18)

fig.add_subplot(gs[0,1])
sns.stripplot(data=df_classifier_results, x='Approach', y='Score_only_sessions')
plt.xticks(rotation=90)
plt.title('Score on session labels only', fontsize=18)

fig.add_subplot(gs[0,2])
sns.stripplot(data=df_classifier_results, x='Approach', y='Score')
plt.xticks(rotation=90)
plt.title('Score on entire labels', fontsize=18)

plt.tight_layout()
plt.savefig('Score_distributions.png', dpi=300)
plt.show()

In [None]:
l_label_order = ['early_OF', 'mid_OF', 'late_OF',
                'early_EPM', 'mid_EPM', 'late_EPM', 
                'early_CD1', 'mid_CD1', 'late_CD1', 
                'early_CD2', 'mid_CD2', 'late_CD2']

l_approaches = ['norm_HeartRate', 'norm_Ceiling', 'all_dimensions', 'UMAP3', 'shuffled']

rows = len(l_approaches)
columns = 3 #len(set(d_classification_results['Results']['DataFrames']['Run']))

fig = plt.figure(figsize=(20, 6*rows), facecolor='white')
gs = fig.add_gridspec(rows, columns)
plt.subplots_adjust(hspace=0.5)

for approach in l_approaches:
    row = l_approaches.index(approach)
    
    if approach == 'shuffled':
        l_indices = [index for index, element in enumerate(d_classification_results['Results']['DataFrames']['Approach']) if approach in element]
        column = 0
        
        for index in l_indices:
            if column < 3:
                df_temp = d_classification_results['Results']['DataFrames']['DataFrame'][index].copy()
                df_confusion = pd.crosstab(df_temp['true_classification_label'], df_temp['predicted_labels'])
                try: 
                    # Try whether all labels are present, if not, one of the following two lines will throw an error and we continue with the next shuffled predictions until we have 3
                    df_confusion = df_confusion.reindex(index=l_label_order)
                    df_confusion[l_label_order]

                    # Plot the data:
                    fig.add_subplot(gs[row, column])
                    sns.heatmap(df_confusion, annot=True)
                    full_approach_name = d_classification_results['Results']['DataFrames']['Approach'][index]
                    plt.title('{} - Run: {}'.format(full_approach_name, column+1))

                    # Increase column count
                    column = column + 1
                except:
                    continue  
    
    else:
        l_indices = [index for index, element in enumerate(d_classification_results['Results']['DataFrames']['Approach']) if element == approach]
        for index in l_indices:
            column = l_indices.index(index)
            if column < 3:
                df_temp = d_classification_results['Results']['DataFrames']['DataFrame'][index].copy()
                df_confusion = pd.crosstab(df_temp['true_classification_label'], df_temp['predicted_labels'])

                # Sort the confusion matrix:
                df_confusion = df_confusion.reindex(index=l_label_order)
                df_confusion = df_confusion[l_label_order]

                # Plot the data:
                fig.add_subplot(gs[row, column])
                sns.heatmap(df_confusion, annot=True)
                plt.title('{} - Run: {}'.format(approach, column+1))

plt.tight_layout()
plt.savefig('Confusion_matrices.png', dpi=300)
plt.show()

In [None]:
if train_and_evaluate_the_classifiers:
    l_approaches_main = [('norm_HeartRate', ['norm_HeartRate']),
                         ('norm_HR_Med_Delta', ['norm_HR_Med_Delta']),
                         ('norm_HR_High_Amp', ['norm_HR_High_Amp']),
                         ('norm_Ceiling', ['norm_Ceiling']),
                         ('norm_DistanceToCeiling', ['norm_DistanceToCeiling']),
                         ('norm_Motion', ['norm_Motion']),
                         ('norm_AreaExplored', ['norm_AreaExplored']),
                         ('norm_Speed', ['norm_Speed']), 
                         ('norm_Temperature', ['norm_Temperature']), 
                         ('all_dimensions', ['norm_HeartRate', 'norm_HR_Med_Delta', 'norm_HR_High_Amp', 'norm_Ceiling', 'norm_DistanceToCeiling',
                                             'norm_Motion', 'norm_AreaExplored', 'norm_Speed',
                                             'norm_Temperature']),
                         ('UMAP1', ['UMAP_cont_r1_1', 'UMAP_cont_r1_2']),
                         ('UMAP2', ['UMAP_cont_r2_1', 'UMAP_cont_r2_2']), 
                         ('UMAP3', ['UMAP_cont_r3_1', 'UMAP_cont_r3_2']),
                         ('univariate_shuffled_1', ['norm_HeartRate']),
                         ('univariate_shuffled_2', ['norm_Ceiling']),
                         ('multivariate_shuffled_all_dimensions', ['norm_HeartRate', 'norm_HR_Med_Delta', 'norm_HR_High_Amp', 'norm_Ceiling', 'norm_DistanceToCeiling',
                                                                   'norm_Motion', 'norm_AreaExplored', 'norm_Speed', 
                                                                   'norm_Temperature']),
                         ('multivariate_shuffled_UMAP', ['UMAP_cont_r1_1', 'UMAP_cont_r1_2'])]

    l_approaches = [elem[0] for elem in l_approaches_main]


    d_classification_results['Results']['Errors'] = {'Summary': {}, 
                                                     'Individual': {}}

    #l_approaches = set(d_classification_results['Results']['DataFrames']['Approach'])

    for approach in l_approaches:
        # Create a temporary dictionary that will be added to the main results dict if we have some results to compute:
        d_temp = {'expl_true': [],
                 'expl_within': [],
                 'expl_between': [],
                 'cond_true': [],
                 'cond_within': [],
                 'cond_between': [],
                 'total_true': [],
                 'total_within': [],
                 'total_between': []
                 }    

        # Identify the indices of all runs of that approach:
        l_indices = [index for index, element in enumerate(d_classification_results['Results']['DataFrames']['Approach']) if approach == element]
        for index in l_indices:
            run = l_indices.index(index)+1
            try: 
                df_temp = d_classification_results['Results']['DataFrames']['DataFrame'][index].copy()
                df_confusion = pd.crosstab(df_temp['true_classification_label'], df_temp['predicted_labels'])

                # Sort the confusion matrix:
                df_confusion = df_confusion.reindex(index=l_label_order)
                df_confusion = df_confusion[l_label_order]

                # How many expl are correctly predicted as expl?
                correct_expl = 0
                for label in l_label_order[:6]:
                    correct_expl = correct_expl + df_confusion.loc[label, label]

                # How many cond are correctly predicted as cond?
                correct_cond = 0
                for label in l_label_order[6:]:
                    correct_cond = correct_cond + df_confusion.loc[label, label]

                # How many total correct predictions?
                total_correct = 0
                for label in l_label_order:
                    total_correct = total_correct + df_confusion.loc[label, label]

                # How many within errors?
                # For expl:
                expl_within = df_confusion.loc[l_label_order[:6], l_label_order[:6]].sum().sum() - correct_expl
                # For cond:
                cond_within = df_confusion.loc[l_label_order[6:], l_label_order[6:]].sum().sum() - correct_cond
                # total:
                total_within = expl_within + cond_within

                # How many between errors?
                # Predicted as expl but actually cond
                expl_between = df_confusion.loc[l_label_order[6:], l_label_order[:6]].sum().sum()
                # Predicted as cond but actually expl
                cond_between = df_confusion.loc[l_label_order[:6], l_label_order[6:]].sum().sum()
                # Total between errors:
                total_between = expl_between + cond_between

                # Sanity check: total errors + total correct must match test set label count:
                total_errors = total_within + total_between
                if total_correct + total_errors != df_confusion.sum().sum():
                    print('Something went wrong for the error quantification of {} - run: {}'.format(approach, run))
                    break
                # Convert everything to percentage and append it to the lists in the temporary dictionary:
                total_preds_expl = df_confusion[l_label_order[:6]].sum().sum()
                total_preds_cond = df_confusion[l_label_order[6:]].sum().sum()
                total_preds = total_preds_expl + total_preds_cond

                d_temp['expl_true'].append(correct_expl / total_preds_expl * 100)
                d_temp['expl_within'].append(expl_within / total_preds_expl * 100)
                d_temp['expl_between'].append(expl_between / total_preds_expl * 100)

                d_temp['cond_true'].append(correct_cond / total_preds_cond * 100)
                d_temp['cond_within'].append(cond_within / total_preds_cond * 100)
                d_temp['cond_between'].append(cond_between / total_preds_cond * 100)            

                d_temp['total_true'].append(total_correct / total_preds * 100)
                d_temp['total_within'].append(total_within / total_preds * 100)
                d_temp['total_between'].append(total_between / total_preds * 100)              

            except:
                continue

        if len(d_temp['expl_true']) > 1:
            d_classification_results['Results']['Errors']['Individual'][approach] = d_temp


            # If not already present, create the respective keys in the summary dictionary:
            if len(d_classification_results['Results']['Errors']['Summary'].keys()) == 0:
                d_classification_results['Results']['Errors']['Summary']['Approach'] = []
                for key in d_temp.keys():
                    d_classification_results['Results']['Errors']['Summary']['mean_{}'.format(key)] = []
                    d_classification_results['Results']['Errors']['Summary']['mean_{}_stdev'.format(key)] = []

            # Append the approach name as label and all mean values and stddevs:
            d_classification_results['Results']['Errors']['Summary']['Approach'].append(approach)
            for key in d_temp:
                d_classification_results['Results']['Errors']['Summary']['mean_{}'.format(key)].append(mean(d_temp[key]))
                d_classification_results['Results']['Errors']['Summary']['mean_{}_stdev'.format(key)].append(stdev(d_temp[key]))


        #print('Done with {}'.format(approach))

In [None]:
l_pred_types = ['expl', 'cond', 'total']

fig = plt.figure(figsize=(8, 25), facecolor='white')
gs = fig.add_gridspec(3,1)

plt.subplots_adjust(hspace=0.9)

for pred_type in l_pred_types:
    l_labels = d_classification_results['Results']['Errors']['Summary']['Approach']
    
    l_means_correct = d_classification_results['Results']['Errors']['Summary']['mean_{}_true'.format(pred_type)]
    l_means_correct_stdev = d_classification_results['Results']['Errors']['Summary']['mean_{}_true_stdev'.format(pred_type)]
    
    l_means_within = d_classification_results['Results']['Errors']['Summary']['mean_{}_within'.format(pred_type)]
    l_means_within_stdev = d_classification_results['Results']['Errors']['Summary']['mean_{}_within_stdev'.format(pred_type)]  
    
    l_means_between = d_classification_results['Results']['Errors']['Summary']['mean_{}_between'.format(pred_type)]
    l_means_between_stdev = d_classification_results['Results']['Errors']['Summary']['mean_{}_between_stdev'.format(pred_type)] 
    
    l_sum_means_within_and_between = [x + y for (x, y) in zip(l_means_between, l_means_within)]
    
    # Bar width and label locations
    width = 0.6
    x = np.arange(len(l_labels))
    
    ax = fig.add_subplot(gs[l_pred_types.index(pred_type), 0])
    
    plt.bar(x, l_means_between, width, yerr=l_means_between_stdev, label='between error', color='red', edgecolor='black')
    plt.bar(x, l_means_within, width, yerr=l_means_within_stdev, label='within error', color='orange', edgecolor='black', bottom=l_means_between)
    plt.bar(x, l_means_correct, width, yerr=l_means_correct_stdev, label='correct', color='green', edgecolor='black', bottom=l_sum_means_within_and_between)
    
   
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.xticks(x, l_labels, rotation='vertical')
    plt.ylabel('Percentage of predictions')
    #plt.xlabel('Approach')
    plt.title(['Only predictions with label "explorative session"', 'Only predictions with label "conditioning session"', 'All predictions'][l_pred_types.index(pred_type)])
    
#plt.tight_layout()
plt.savefig('Stacked_error_types.png', dpi=300)
plt.show()

In [None]:
if train_and_evaluate_the_classifiers:
    # Save the results:
    with open('classifier_results_{}s_intervals.p'.format(str(interval_size_s)), 'wb') as fp:
        pickle.dump(d_classification_results, fp, protocol=pickle.HIGHEST_PROTOCOL)