###  Imports

In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
from IPython.display import HTML, display

pd.set_option('display.max_rows', 500)

In [2]:
# Initialize some variables
column_order = ['dataset used', 'classifier', 'feature used', 'min', 'max', 'median', 'mean']
feature_list = ['phase', 'breathing', 'heartbeat', 'combined br hb', 'undercomplete', 'sparse', 'deep']
results_path = "results/"

In [3]:
def make_an_aggregated_dataframe(dataset_used, feature_used, accs_dataframe):
    
    # Initialize a dataframe
    return_df = pd.DataFrame()
    
    # Initialize a dictionary
    stats_dict = {}
    stats_dict['dataset used'] = dataset_used
    stats_dict['feature used'] = feature_used
    
    # Loop over all classifiers and 
    for classifier in accs_dataframe.index:
        
        # Extract the accuracies array
        accs_array = np.array(accs_dataframe.loc[classifier])
        
        # Calculate statistics
        stats_dict['classifier'] = classifier
        stats_dict['min'] = np.min(accs_array)
        stats_dict['max'] = np.max(accs_array)
        stats_dict['median'] = np.median(accs_array)
        stats_dict['mean'] = np.mean(accs_array)
#         stats_dict['data_len'] = len(accs_array)
        
        # Append to dataframe
        return_df = return_df.append(stats_dict, ignore_index=True)
    
    return_df = return_df[column_order]
    
    return return_df

In [4]:
def custom_print(sorted_df):
    
    # --- Open the table tag
    table = '<table border="1" class="dataframe" style="border: 1px solid #ccc">'
    
    # --- Add table headers
    table += '<tr style="text-align: right; border-bottom: 1px solid black;">'
    columns = sorted_df.columns.to_list() # list of all column names
    columns = np.concatenate([columns, ['Autoencoder','Best mean value']])
    for col in columns:
        table += '<th>' + col + '</th>'
    table += '</tr>'
    
    # --- Copy data from dataframe into the table
    counter = 0
    row_counter = 0 # total row counter
    # iterate over each row
    for _, row in sorted_df.iterrows():
        # open the row tag and add a border based on the counter
        if (counter == 0):
            table += '<tr style="border-top: 1px solid black;">'
        elif (counter == 4):
            table += '<tr style="border-top: 1px solid #aaa;">'
        else:
            table += '<tr>'
        
        # iterate over all data in that row and copy it over
        for data in row:
            if type(data) != str:
                table += f'<td>{data:.4f}</td>'
            else:
                table += f'<td>{data}</td>'
        
        # every 0th row, add extra data for the last two columns
        if (counter == 0):
            
            # find best mean value of these 4 rows
            best_mean = row['mean']
            for ix in range(3):
                tmp_row = sorted_df.iloc[row_counter + ix + 1]
                # save best mean value so far:
                if (tmp_row['mean'] > best_mean):
                    best_mean = tmp_row['mean']
            
            table += '<td rowspan="4" style="text-align: center;"> NO </td>'
            table += f'<td rowspan="4" style="text-align: center;">{best_mean:.4f}</td>'
            counter += 1
        
        # every 4th row change a value and reset the counter
        elif (counter == 4):
            
            # find best mean value of these 4 rows
            best_mean = row['mean']
            for ix in range(3):
                tmp_row = sorted_df.iloc[row_counter + ix + 1]
                # save best mean value so far:
                if (tmp_row['mean'] > best_mean):
                    best_mean = tmp_row['mean']
            
            table += '<td rowspan="4" style="text-align: center;"> YES </td>'
            table += f'<td rowspan="4" style="text-align: center;">{best_mean:.4f}</td>'
            counter += 1
        
        # every 7th row reset counter to 0
        elif (counter == 7):
            counter = 0
        
        # increment the counter
        else:
            counter +=1
        
        # close the row tag
        table += '</tr>'
        
        # always increment row_counter
        row_counter += 1;
    
    # --- Close the table tag
    table += '</table>'
    
    return table

### Busy vs Relaxed

Prepare for reading data from the all dataset:

In [5]:
BvR_df = pd.DataFrame()
BvR_prefix = "BvR/BvR"
dataset_used_midpoint = "-all"
tfv1_file_ending = "-tfv1"
file_format_ending = ".csv"

In [6]:
# read data
df_all = pd.read_csv(results_path + BvR_prefix + dataset_used_midpoint + file_format_ending, index_col=[0])

# the data is saved as string, so we need to extract the array from that string
for feature_used in feature_list:
    temp_df = df_all[feature_used].apply(literal_eval)
    temp_df = make_an_aggregated_dataframe('all', feature_used, temp_df)
    BvR_df = BvR_df.append(temp_df, ignore_index=True)

In [7]:
# the same as above, but for the contractive data
df_all_tfv1 = pd.read_csv(results_path + BvR_prefix + dataset_used_midpoint + tfv1_file_ending + file_format_ending, index_col=[0])

temp_df = df_all_tfv1['contractive'].apply(literal_eval)
temp_df = make_an_aggregated_dataframe('all', 'contractive', temp_df)
BvR_df = BvR_df.append(temp_df, ignore_index=True)

Do the same for the NC dataset:

In [8]:
BvR_prefix = "BvR/BvR"
dataset_used_midpoint = "-NC"
tfv1_file_ending = "-tfv1"
file_format_ending = ".csv"

In [9]:
df_NC = pd.read_csv(results_path + BvR_prefix + dataset_used_midpoint + file_format_ending, index_col=[0])

for feature_used in feature_list:
    temp_df = df_NC[feature_used].apply(literal_eval)
    temp_df = make_an_aggregated_dataframe('NC', feature_used, temp_df)
    BvR_df = BvR_df.append(temp_df, ignore_index=True)

In [10]:
df_NC_tfv1 = pd.read_csv(results_path + BvR_prefix + dataset_used_midpoint + tfv1_file_ending + file_format_ending, index_col=[0])
temp_df = df_NC_tfv1['contractive'].apply(literal_eval)
temp_df = make_an_aggregated_dataframe('NC', 'contractive', temp_df)
BvR_df = BvR_df.append(temp_df, ignore_index=True)

Display the data in a table:

In [11]:
# Sort the dataframe
BvR_df.sort_values(['dataset used', 'classifier'], ascending=[True,True], inplace=True)

# Make an HTML table with the data and display it
display(HTML(custom_print(BvR_df)))

dataset used,classifier,feature used,min,max,median,mean,Autoencoder,Best mean value
NC,LSTM,phase,0.25,1.0,0.5,0.6422,NO,0.701
NC,LSTM,breathing,0.5,1.0,0.5,0.6961,NO,0.701
NC,LSTM,heartbeat,0.0,1.0,0.5,0.4853,NO,0.701
NC,LSTM,combined br hb,0.5,1.0,0.6667,0.701,NO,0.701
NC,LSTM,undercomplete,0.25,1.0,0.5,0.5441,YES,0.5441
NC,LSTM,sparse,0.0,1.0,0.5,0.5441,YES,0.5441
NC,LSTM,deep,0.0,1.0,0.5,0.5,YES,0.5441
NC,LSTM,contractive,0.1667,1.0,0.5,0.5343,YES,0.5441
NC,SVC,phase,0.5,1.0,0.6667,0.6618,NO,0.7696
NC,SVC,breathing,0.5,1.0,0.8333,0.7696,NO,0.7696


Save the whole busy vs relax dataframe to a file:

In [12]:
# save dataframe to file
BvR_df.to_csv(results_path + "BvR.csv", mode='w', index=False)