# Active Learning using Uncertainty Quantification Data Analysis

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
# Set Parameters
N_RUNS = 10
INIT_SIZE = 40
ACQ_SIZE = 40
ACQ_MAX = 2000

N_STEPS = np.round((ACQ_MAX - INIT_SIZE) / ACQ_SIZE)

# Load data and subset to equal size based on minimum file
MCD_data = pd.read_csv('../data/dataMCD.csv')
ENS_data = pd.read_csv('../data/dataENS.csv')
# TODO add dropconnect + random

## Average loss over time

In [None]:
def avg_over_runs (data, col_name):
    """
    Calculates an average over a number of runs of the given column in the given dataframe
    :param data: dataframe containing data to average
    :param col_name: the column which contains the data to average
    """
    res = [0 for _ in range(N_STEPS)]

    for i in range(N_RUNS):
        curr_rewards = data.loc[data['run'] == i][col_name].values

        for j in range(N_STEPS):
            res[j] += curr_rewards[j]

    res = [x/N_RUNS for x in res]

    return pd.DataFrame(res)

In [None]:
# calculate the average reward over runs for each agent type
avg_loss_MCD = avg_over_runs(MCD_data, 'loss')
avg_loss_ENS = avg_over_runs(ENS_data, 'loss')
#TODO add dropconnect + random

In [None]:
# plot the average loss over time, including the standard deviation
# ref: https://stackoverflow.com/questions/70351809/python-calculate-confidence-interval-for-rolling-average-of-dataframe-entries

WS = 10
Z = 1.96

x_range = [i for i in range(N_STEPS)]

avg_loss_MCD = pd.DataFrame(avg_loss_MCD)
avg_loss_ENS = pd.DataFrame(avg_loss_ENS)
#TODO add dropconnect + random

movMeanMCD = avg_loss_MCD.rolling(window=WS, center=True, min_periods=1).mean().values.ravel()
movStdMCD = avg_loss_MCD.rolling(window=WS, center=True, min_periods=1).std().values.ravel()
confIntMCD = Z * avg_loss_MCD / np.sqrt(WS)

movMeanENS = avg_loss_ENS.rolling(window=WS, center=True, min_periods=1).mean().values.ravel()
movStdENS = avg_loss_ENS.rolling(window=WS, center=True, min_periods=1).std().values.ravel()
confIntENS = Z * avg_loss_ENS / np.sqrt(WS)

#TODO add dropconnect + random


plt.figure(figsize=(12,5))
plt.plot(movMeanMCD, label='MCDropout', color='y')
plt.fill_between(x_range, movMeanMCD - confIntMCD, movMeanMCD + confIntMCD,
                 color='y', alpha=.1, label='MCD 95%')
plt.plot(movMeanENS, label='ENS', color='r')
plt.fill_between(x_range, movMeanENS - confIntENS, movMeanENS + confIntENS,
                 color='r', alpha=.1, label='ENS 95%')
#TODO add dropconnect + random

plt.title('Average Loss over time')
plt.ylabel('Average Loss')
plt.xlabel('step')
plt.legend()
plt.show()

In [None]:
print("Mean and Std. Deviation of Average Loss over Time:")
print(f'RAND {np.mean(avg_loss_MCD)} ({np.std(avg_loss_MCD.values)})')
print(f'LIN {np.mean(avg_loss_MCD)} ({np.std(avg_loss_MCD.values)})')
print(f'NN {np.mean(avg_loss_MCD)} ({np.std(avg_loss_MCD.values)})')

# TO DO:
- add average accuracy over time 

- Check if we want to stick with the rolling average window or do that differently