# All imports #

In [1]:
from __future__ import print_function

import sys, os

import tensorflow.keras
import pandas as pd
import numpy as np
import sklearn
import keras
import csv

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout

from keras_uncertainty.models import MCDropoutClassifier, MCDropoutRegressor
from keras_uncertainty.utils import numpy_regression_nll


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
def load_joint_space_csv_chunks(file_path):
    data_frame = pd.read_csv(file_path, skiprows=1, header=None)
    del data_frame[18]
    return data_frame

def load_task_space_csv_chunks(file_path):
    return pd.read_csv(file_path, skiprows=1, header=None)


In [3]:
##please select the appropriate folder, willl use os.path.join() for completed script
TRAIN_FOLDER = '/home/dfki.uni-bremen.de/bmanickavasakan/newdataset_rh5_leg/leg_5steps/'
TEST_FOLDER = '/home/dfki.uni-bremen.de/bmanickavasakan/newdataset_rh5_leg/leg_5steps/test_4steps'

X_TRAIN_FILE = os.path.join(TRAIN_FOLDER, 'leg_forwardkinematics_x.csv')
Q_TRAIN_FILE = os.path.join(TRAIN_FOLDER, 'leg_sysstate_q.csv')
x_train = load_task_space_csv_chunks(X_TRAIN_FILE)
q_train = load_joint_space_csv_chunks(Q_TRAIN_FILE)

X_TEST_FILE = os.path.join(TEST_FOLDER, 'leg_forwardkinematics_x.csv')
Q_TEST_FILE = os.path.join(TEST_FOLDER, 'leg_sysstate_q.csv')
x_test = load_task_space_csv_chunks(X_TEST_FILE)
q_test = load_joint_space_csv_chunks(Q_TEST_FILE)

In [4]:
print(x_test.shape, q_test.shape, x_train.shape, q_train.shape)

(4096, 9) (4096, 18) (15625, 9) (15625, 18)


In [5]:

x_train_df = pd.DataFrame(x_train)
q_train_df = pd.DataFrame(q_train)
x_test_df = pd.DataFrame(x_test)
q_test_df = pd.DataFrame(q_test)


from sklearn.ensemble import IsolationForest


clf = IsolationForest(n_estimators=100, max_samples='auto', max_features=1, bootstrap=False, n_jobs= -1, random_state=42, verbose=0)
clf.fit(q_train_df)

pred = clf.predict(q_train_df)
q_train_df['anamoly'] = pred
print(q_train_df['anamoly'].value_counts())

InDistribution_Q_Train = q_train_df[q_train_df['anamoly'] == 1]
OutDistribution_Q_Train =   q_train_df[q_train_df['anamoly'] == -1]
InDistribution_X_Train =    x_train_df[q_train_df['anamoly'] == 1]
OutDistribution_X_Train =   x_train_df[q_train_df['anamoly'] == -1]

clf_test = IsolationForest(n_estimators=100, max_samples='auto', max_features=1, bootstrap=False, n_jobs= -1, random_state=42, verbose=0)
clf_test.fit(q_test_df)
pred_test = clf.predict(q_test_df)
q_test_df['anamoly'] = pred_test

InDistribution_Q_Test = q_test_df[q_test_df['anamoly'] == 1]
OutDistribution_Q_Test =q_test_df[q_test_df['anamoly'] == -1]
InDistribution_X_Test = x_test_df[q_test_df['anamoly'] == 1]
OutDistribution_X_Test =x_test_df[q_test_df['anamoly'] == -1]

x_train_1 = InDistribution_X_Train
q_train_1 = InDistribution_Q_Train.drop(['anamoly'], axis=1)
x_test_1 = InDistribution_X_Test
q_test_1 = InDistribution_Q_Test.drop(['anamoly'], axis=1)

OOD_x_train = OutDistribution_X_Train
OOD_q_train = OutDistribution_Q_Train.drop(['anamoly'], axis=1)
OOD_x_test = OutDistribution_X_Test
OOD_q_test = OutDistribution_Q_Test.drop(['anamoly'], axis=1)

q_train_df = q_train_df.drop(['anamoly'], axis=1)
q_test_df = q_test_df.drop(['anamoly'], axis=1)





 1    14062
-1     1563
Name: anamoly, dtype: int64




# Building the model with MC Droput #

In [None]:
'''

Standard deviation based data splitting

We consider the Q features and use the feature with the 

highest SD for dividing the dataset
'''
'''
stats_q_train = pd.DataFrame()
stats_q_train["Mean"] = q_train.mean()
stats_q_train["Var"] = q_train.var()
stats_q_train["STD"] = q_train.std()
stats_q_train["OneSigmaMax"] = stats_q_train["Mean"] + stats_q_train["STD"]
stats_q_train["OneSigmaMin"] = stats_q_train["Mean"] - stats_q_train["STD"]
stats_q_train.T

max_std = stats_q_train["STD"].max()
colomn_max_std = stats_q_train["STD"].idxmax()

maximum = stats_q_train.loc[colomn_max_std, "Mean"] + (1.5 * max_std)
minimum = stats_q_train.loc[colomn_max_std, "Mean"] - (1.5 * max_std)
print(maximum, minimum)

InDistribution_Q_Train = q_train[q_train[colomn_max_std].le(maximum) & q_train[colomn_max_std].ge(minimum)]
OutDistribution_Q_Train = q_train[q_train[colomn_max_std].ge(maximum) | q_train[colomn_max_std].le(minimum)]
InDistribution_X_Train = x_train[q_train[colomn_max_std].le(maximum) & q_train[colomn_max_std].ge(minimum)]
OutDistribution_X_Train = x_train[q_train[colomn_max_std].ge(maximum) | q_train[colomn_max_std].le(minimum)]

InDistribution_Q_Test = q_test[q_test[colomn_max_std].le(maximum) & q_test[colomn_max_std].ge(minimum)]
OutDistribution_Q_Test = q_test[q_test[colomn_max_std].ge(maximum) | q_test[colomn_max_std].le(minimum)]
InDistribution_X_Test = x_test[q_test[colomn_max_std].le(maximum) & q_test[colomn_max_std].ge(minimum)]
OutDistribution_X_Test = x_test[q_test[colomn_max_std].ge(maximum) | q_test[colomn_max_std].le(minimum)]

x_train_1 = InDistribution_X_Train
q_train_1 = InDistribution_Q_Train
x_test_1 = InDistribution_X_Test
q_test_1 = InDistribution_Q_Test

OOD_x_train = OutDistribution_X_Train
OOD_q_train = OutDistribution_Q_Train
OOD_x_test = OutDistribution_X_Test
OOD_q_test = OutDistribution_Q_Test

print("//////////////////////")
print(x_train_1.shape, OOD_x_train.shape, x_test_1.shape, OOD_x_test.shape)
'''

In [6]:

x_scaler = MinMaxScaler()
q_scaler = MinMaxScaler()

#complete test set
x_train = x_scaler.fit_transform(x_train)
q_train = q_scaler.fit_transform(q_train_df)

x_test = x_scaler.transform(x_test)
q_test = q_scaler.transform(q_test_df)

#In order training set
x_train_1 = x_scaler.transform(x_train_1)
q_train_1 = q_scaler.transform(q_train_1)


#split testing data
IOD_x_test = x_scaler.transform(x_test_1)
IOD_q_test = q_scaler.transform(q_test_1)

OOD_x_test = x_scaler.transform(OOD_x_test)
OOD_q_test = q_scaler.transform(OOD_q_test)

In [7]:
import tensorflow as tf
model_iso = tf.keras.models.load_model("MC_DROPOUT_OOD_SD_MODEL.h5")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


In [8]:
q_pred = model_iso.predict(IOD_x_test, verbose=0)
q_unnorm = q_scaler.inverse_transform(IOD_q_test)
q_pred_unnorm = q_scaler.inverse_transform(q_pred)

global_mae = mean_absolute_error(IOD_q_test, q_pred)
mae_1 = mean_absolute_error(q_unnorm, q_pred_unnorm)

print("Testing MAE: {:.5f}".format(global_mae))
print("Testing MAEX: {:.5f}".format(mae_1))


# Compute MAE for each output independently.
for i in range(IOD_q_test.shape[1]):
    norm_mae_i = mean_absolute_error(IOD_q_test[:, i], q_pred[:, i])
    mae_i = mean_absolute_error(q_unnorm[:, i], q_pred_unnorm[:, i])
    print("Q feature {} has unnorm MAE: {:.4f} (Range {:.4f} to {:.4f}) normalized MAE: {:.4f}".format(i, mae_i, q_scaler.data_min_[i], q_scaler.data_max_[i], norm_mae_i))


Testing MAE: 0.08714
Testing MAEX: 0.06552
Q feature 0 has unnorm MAE: 0.3589 (Range -0.7330 to 0.4398) normalized MAE: 0.3060
Q feature 1 has unnorm MAE: 0.2989 (Range -0.3840 to 0.5792) normalized MAE: 0.3103
Q feature 2 has unnorm MAE: 0.0477 (Range -0.8200 to -0.0680) normalized MAE: 0.0634
Q feature 3 has unnorm MAE: 0.0475 (Range -0.8381 to -0.0792) normalized MAE: 0.0625
Q feature 4 has unnorm MAE: 0.0053 (Range 0.0070 to 0.0907) normalized MAE: 0.0634
Q feature 5 has unnorm MAE: 0.0664 (Range 0.0000 to 1.0000) normalized MAE: 0.0664
Q feature 6 has unnorm MAE: 0.0030 (Range -0.0822 to 0.0000) normalized MAE: 0.0366
Q feature 7 has unnorm MAE: 0.0056 (Range 0.0000 to 0.0782) normalized MAE: 0.0711
Q feature 8 has unnorm MAE: 0.0766 (Range -0.7850 to 0.4710) normalized MAE: 0.0610
Q feature 9 has unnorm MAE: 0.0721 (Range -0.7850 to 0.4710) normalized MAE: 0.0574
Q feature 10 has unnorm MAE: 0.0702 (Range -0.5905 to 0.8678) normalized MAE: 0.0482
Q feature 11 has unnorm MAE: 0.07

# MC Dropout Regressor, O/P : mean, Std #

In [9]:
def test_mcdropout_regressor(x_test_values, q_test_values, model, data_scaler):   
    mc_model = MCDropoutRegressor(model)
    inp = x_test_values  
    
    mean, std = mc_model.predict(inp, num_samples = 10)
    
    q_pred_unnormalised = data_scaler.inverse_transform(mean)
    
    q_sd_unnromalised = data_scaler.inverse_transform(std)
    
    global_mae = mean_absolute_error(q_test_values, mean)

    print("Testing MAE: {:.5f}".format(global_mae))

    return q_pred_unnormalised, q_sd_unnromalised
  


In [10]:
mean_1, std_1 = test_mcdropout_regressor(x_test, q_test, model_iso, q_scaler)
q_test_unorm = q_scaler.inverse_transform(q_test)
print("NLL: {:.5f}".format(numpy_regression_nll(q_test_unorm, mean_1, std_1**2)))
sd_test_1 = pd.DataFrame(std_1)
print(sd_test_1[0].min())

Testing MAE: 0.30565
NLL: 13.85573
-0.7275876


In [10]:
mean_test_IOD, sd_test_IOD = test_mcdropout_regressor(IOD_x_test, IOD_q_test, model_iso, q_scaler)
q_test_unorm = q_scaler.inverse_transform(IOD_q_test)
print("NLL: {:.5f}".format(numpy_regression_nll(q_test_unorm, mean_test_IOD, sd_test_IOD**2)))

Testing MAE: 0.27893
NLL: 9.53655


In [11]:
mean_test_OOD, sd_test_OOD = test_mcdropout_regressor(OOD_x_test, OOD_q_test, model_iso, q_scaler)
q_test_unorm = q_scaler.inverse_transform(OOD_q_test)
print("NLL: {:.5f}".format(numpy_regression_nll(q_test_unorm, mean_test_OOD, sd_test_OOD**2)))

Testing MAE: 0.35221
NLL: 4.83142


In [12]:
sd_test_IOD_df = pd.DataFrame(sd_test_IOD)
sd_test_OOD_df = pd.DataFrame(sd_test_OOD)
new_scores = np.concatenate([sd_test_IOD_df[0], sd_test_OOD_df[0]], axis=0)
new_labels = np.concatenate([np.zeros_like(sd_test_IOD_df[0]), np.ones_like(sd_test_OOD_df[0])], axis=0)
histogram_df = pd.DataFrame(new_scores, new_labels)
print(new_scores.max())
print(new_scores.min())
#histogram_df.hist(column=0)

#sd_test_IOD_df.hist(column=0)
#sd_test_OOD_df.hist(column=0)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()

a_heights, a_bins = np.histogram(sd_test_IOD_df[0])
b_heights, b_bins = np.histogram(sd_test_OOD_df[0], bins=a_bins)

width = (a_bins[1] - a_bins[0])/3

ax.bar(a_bins[:-1], a_heights, width = width, facecolor='blue',label="IOD SD")
ax.bar(b_bins[:-1]+width, b_heights, width = width, facecolor='red', label="OOD SD")
ax.legend()


-0.6650415
-0.7270739


<matplotlib.legend.Legend at 0x7f49f43ab828>

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score

norm_scores = new_scores - min(new_scores) / (max(new_scores) - min(new_scores))

auc = roc_auc_score(new_labels, new_scores)
fpr, tpr, threshs = roc_curve(new_labels, norm_scores, drop_intermediate=True)
print(auc)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr)
plt.show()
