In [1]:
import pandas as pd
from sklearn.utils import shuffle
import numpy as np
import xgboost as xgb

xgb.set_config(verbosity=0)
import joblib
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [2]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

In [3]:
dataset = pd.read_csv('datasets/BANDAI_5frame.csv')
dataset.head()

Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude,EMOTION_P,EMOTION_A,EMOTION_D
0,0.433596,0.130036,0.335475,0.538619,0.184473,0.310089,0.210679,0.093915,0.236151,0.223894,...,-0.137483,-0.139467,0.09292,0.196129,0.221155,0.137483,0.139467,0.05,-0.05,0.0
1,0.348051,0.145184,0.256784,0.455501,0.197954,0.284215,0.172405,0.104876,0.236151,0.223894,...,-0.072441,-0.095084,0.048935,0.032804,0.070625,0.072096,0.045111,0.05,-0.05,0.0
2,0.320294,0.206306,0.180224,0.38019,0.234406,0.259905,0.133006,0.127385,0.236151,0.223894,...,-0.039016,-0.093423,0.030819,0.043059,0.028182,0.036532,0.014232,0.05,-0.05,0.0
3,0.400389,0.2873,0.136974,0.319861,0.279208,0.23945,0.112632,0.156856,0.236151,0.223894,...,-0.036825,-0.101489,0.044534,0.054382,0.015679,0.012813,0.021506,0.05,-0.05,0.0
4,0.465923,0.356129,0.136171,0.309995,0.313229,0.224432,0.11295,0.18668,0.236151,0.223894,...,-0.054338,-0.117573,0.04467,0.036618,0.020074,0.022012,0.024961,0.05,-0.05,0.0


In [4]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 62841
No Test Samples: 15710


In [5]:
train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
train_emotions_OG = train_emotions.copy()

test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions_OG = test_emotions.copy()

In [6]:
scaled_train = scaler.transform(train_dataset.copy())

train_emotions_p = model_p.predict(scaled_train)
train_emotions_a = model_a.predict(scaled_train)
train_emotions_d = model_d.predict(scaled_train)

rows = []
for i in range(len(train_dataset)):
    rows.append([train_emotions_p[i], train_emotions_a[i], train_emotions_d[i]])

train_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

train_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.100351,0.595609,0.385265
1,0.050395,-0.051552,0.000853
2,0.599911,0.399616,0.100228
3,0.299848,0.401179,0.596684
4,0.300454,0.40218,0.604512


In [7]:
train_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
10591,0.1,0.6,0.4
47213,0.05,-0.05,0.0
37812,0.6,0.4,0.1
10153,0.3,0.4,0.6
61275,0.3,0.4,0.6


In [8]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in train_emotions.index:
    predicted = np.asarray([train_emotions.iloc[ind]])
    true = np.asarray([train_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [9]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

(62841, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
10591,0.1,0.6,0.4
47213,0.05,-0.05,0.0
37812,0.6,0.4,0.1
10153,0.3,0.4,0.6
61275,0.3,0.4,0.6


In [10]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

(62841, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.100351,0.595609,0.385265
1,0.050395,-0.051552,0.000853
2,0.599911,0.399616,0.100228
3,0.299848,0.401179,0.596684
4,0.300454,0.40218,0.604512


In [11]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

(62841, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
10591,0.439561,0.180391,0.534318,0.675479,0.066496,0.425524,0.21091,0.17997,0.236151,0.223894,...,-0.106314,-0.131269,-0.156307,-0.133396,-0.114509,0.01365,0.028496,0.045201,0.018164,0.013817
47213,0.68304,0.382658,0.298693,0.491928,0.412911,0.398399,0.287946,0.244008,0.236151,0.223894,...,-0.058538,-0.048443,-0.024274,-0.096213,-0.058124,0.007072,0.013289,0.006958,0.009877,0.007182
37812,0.753033,0.382073,0.370285,0.339147,0.450852,0.44918,0.28114,0.278856,0.236151,0.223894,...,-0.141222,-0.035153,-0.045211,-0.121631,-0.071847,0.025381,0.022949,0.013413,0.014412,0.00902
10153,0.732429,0.363322,0.393088,0.848963,0.368705,0.423669,0.353957,0.355119,0.236151,0.223894,...,-0.170373,-0.219881,-0.202737,-0.200698,-0.189514,0.026732,0.005746,0.023078,0.052011,0.017836
61275,0.300576,0.0532,0.239504,0.311795,0.214367,0.292451,0.121907,0.071534,0.236151,0.223894,...,-0.064929,-0.02598,-0.023145,-0.053879,-0.04387,0.00391,0.007181,0.007288,0.006551,0.005339


In [12]:
scaled_test = scaler.transform(test_dataset.copy())

test_emotions_p = model_p.predict(scaled_test)
test_emotions_a = model_a.predict(scaled_test)
test_emotions_d = model_d.predict(scaled_test)

rows = []
for i in range(len(test_dataset)):
    rows.append([test_emotions_p[i], test_emotions_a[i], test_emotions_d[i]])

test_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

test_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.040936,-0.11615,-0.037744
1,0.126196,-0.43485,-0.158045
2,0.089277,-0.276512,-0.09301
3,-0.079697,-0.481704,-0.103777
4,0.049968,-0.049992,-3e-05


In [13]:
test_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
44289,0.05,-0.05,0.0
57391,0.1,-0.75,-0.25
49416,0.1,-0.75,-0.25
43459,-0.1,-0.6,-0.15
32006,0.05,-0.05,0.0


In [14]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in test_emotions.index:
    predicted = np.asarray([test_emotions.iloc[ind]])
    true = np.asarray([test_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [15]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

(14134, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
44289,0.05,-0.05,0.0
43459,-0.1,-0.6,-0.15
32006,0.05,-0.05,0.0
73924,0.05,-0.05,0.0
36059,0.05,-0.05,0.0


In [16]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

(14134, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.040936,-0.11615,-0.037744
3,-0.079697,-0.481704,-0.103777
4,0.049968,-0.049992,-3e-05
5,0.050073,-0.050033,-3e-05
6,0.063777,-0.000388,0.003911


In [17]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

(14134, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
44289,0.653859,0.396167,0.265457,0.539838,0.418668,0.384897,0.292844,0.230188,0.236151,0.223894,...,-0.057079,-0.098987,-0.068511,-0.053368,-0.074612,0.00257,0.008275,0.01807,0.004747,0.004447
43459,0.529244,0.51198,0.18908,0.52559,0.403263,0.220454,0.155347,0.216025,0.236151,0.223894,...,-0.096327,-0.080076,-0.119895,-0.107922,-0.092677,0.022273,0.018326,0.002661,0.043354,0.008195
32006,0.592794,0.373263,0.4291,0.234795,0.381293,0.387227,0.299701,0.274195,0.236151,0.223894,...,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0
73924,0.369502,0.072189,0.357265,0.540016,0.279785,0.373409,0.117348,0.151797,0.236151,0.223894,...,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0
36059,0.409009,0.393653,0.249033,0.65043,0.324132,0.226599,0.091478,0.143226,0.236151,0.223894,...,-0.183041,-0.114653,-0.135363,-0.240813,-0.151699,0.012574,0.024731,0.041914,0.008435,0.014017



# AutoEncoder


In [18]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

In [19]:
"""
self.encoder = tf.keras.Sequential([
            layers.Dense(20, activation='relu'),
            layers.Dense(15, activation='linear'),
            layers.Dense(10, activation='relu'),
            layers.Dense(latent_dim, activation='relu'),
        ])
        
        self.decoder = tf.keras.Sequential([
            layers.Dense(10, activation='relu'),
            layers.Dense(15, activation='linear'),
            layers.Dense(20, activation='relu'),
            layers.Dense(25, activation='linear'),
        ])
epochs = 128
"""

latent_dim = 5

class Autoencoder(Model):
    def __init__(self, latent_dim):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim   
        
        self.encoder = tf.keras.Sequential([
            layers.Dense(20, activation='relu'),
            layers.Dense(15, activation='linear'),
            layers.Dense(10, activation='relu'),
            layers.Dense(latent_dim, activation='linear'),
        ])
        
        self.decoder = tf.keras.Sequential([
            layers.Dense(10, activation='relu'),
            layers.Dense(15, activation='linear'),
            layers.Dense(20, activation='relu'),
            layers.Dense(25, activation='linear'),
        ])

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

autoencoder = Autoencoder(latent_dim)

## Train

In [20]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
autoencoder.fit(train_dataset, train_dataset,
                epochs=2048,
                shuffle=True,
                validation_data=(test_dataset, test_dataset), verbose=2)

Epoch 1/2048
1964/1964 - 3s - loss: 0.0032 - val_loss: 0.0015 - 3s/epoch - 2ms/step
Epoch 2/2048
1964/1964 - 2s - loss: 0.0011 - val_loss: 9.3308e-04 - 2s/epoch - 1ms/step
Epoch 3/2048
1964/1964 - 2s - loss: 9.3492e-04 - val_loss: 8.8971e-04 - 2s/epoch - 1ms/step
Epoch 4/2048
1964/1964 - 3s - loss: 8.9488e-04 - val_loss: 8.3512e-04 - 3s/epoch - 1ms/step
Epoch 5/2048
1964/1964 - 3s - loss: 8.5193e-04 - val_loss: 8.1274e-04 - 3s/epoch - 1ms/step
Epoch 6/2048
1964/1964 - 3s - loss: 8.2851e-04 - val_loss: 7.7718e-04 - 3s/epoch - 1ms/step
Epoch 7/2048
1964/1964 - 3s - loss: 8.1600e-04 - val_loss: 7.7935e-04 - 3s/epoch - 1ms/step
Epoch 8/2048
1964/1964 - 3s - loss: 8.0546e-04 - val_loss: 7.6143e-04 - 3s/epoch - 1ms/step
Epoch 9/2048
1964/1964 - 3s - loss: 7.9933e-04 - val_loss: 7.5989e-04 - 3s/epoch - 1ms/step
Epoch 10/2048
1964/1964 - 3s - loss: 7.9246e-04 - val_loss: 7.6354e-04 - 3s/epoch - 1ms/step
Epoch 11/2048
1964/1964 - 3s - loss: 7.8359e-04 - val_loss: 7.4402e-04 - 3s/epoch - 1ms/ste

Epoch 90/2048
1964/1964 - 3s - loss: 4.0583e-04 - val_loss: 3.8429e-04 - 3s/epoch - 1ms/step
Epoch 91/2048
1964/1964 - 2s - loss: 4.0572e-04 - val_loss: 3.9589e-04 - 2s/epoch - 1ms/step
Epoch 92/2048
1964/1964 - 3s - loss: 4.0472e-04 - val_loss: 3.9599e-04 - 3s/epoch - 1ms/step
Epoch 93/2048
1964/1964 - 3s - loss: 4.0511e-04 - val_loss: 3.8083e-04 - 3s/epoch - 1ms/step
Epoch 94/2048
1964/1964 - 3s - loss: 4.0516e-04 - val_loss: 3.8620e-04 - 3s/epoch - 1ms/step
Epoch 95/2048
1964/1964 - 3s - loss: 4.0443e-04 - val_loss: 3.8328e-04 - 3s/epoch - 1ms/step
Epoch 96/2048
1964/1964 - 3s - loss: 4.0477e-04 - val_loss: 3.8394e-04 - 3s/epoch - 1ms/step
Epoch 97/2048
1964/1964 - 2s - loss: 4.0445e-04 - val_loss: 3.8702e-04 - 2s/epoch - 1ms/step
Epoch 98/2048
1964/1964 - 2s - loss: 4.0286e-04 - val_loss: 3.9021e-04 - 2s/epoch - 1ms/step
Epoch 99/2048
1964/1964 - 2s - loss: 4.0392e-04 - val_loss: 3.9737e-04 - 2s/epoch - 1ms/step
Epoch 100/2048
1964/1964 - 3s - loss: 4.0342e-04 - val_loss: 3.8025e-0

Epoch 178/2048
1964/1964 - 3s - loss: 3.8079e-04 - val_loss: 3.6525e-04 - 3s/epoch - 1ms/step
Epoch 179/2048
1964/1964 - 3s - loss: 3.8125e-04 - val_loss: 3.6354e-04 - 3s/epoch - 1ms/step
Epoch 180/2048
1964/1964 - 2s - loss: 3.8032e-04 - val_loss: 3.7051e-04 - 2s/epoch - 1ms/step
Epoch 181/2048
1964/1964 - 2s - loss: 3.8059e-04 - val_loss: 3.5828e-04 - 2s/epoch - 1ms/step
Epoch 182/2048
1964/1964 - 2s - loss: 3.8065e-04 - val_loss: 3.6636e-04 - 2s/epoch - 1ms/step
Epoch 183/2048
1964/1964 - 2s - loss: 3.7923e-04 - val_loss: 3.6536e-04 - 2s/epoch - 1ms/step
Epoch 184/2048
1964/1964 - 2s - loss: 3.8074e-04 - val_loss: 3.5532e-04 - 2s/epoch - 1ms/step
Epoch 185/2048
1964/1964 - 3s - loss: 3.8021e-04 - val_loss: 3.6468e-04 - 3s/epoch - 1ms/step
Epoch 186/2048
1964/1964 - 3s - loss: 3.8035e-04 - val_loss: 3.5713e-04 - 3s/epoch - 1ms/step
Epoch 187/2048
1964/1964 - 3s - loss: 3.7993e-04 - val_loss: 3.5897e-04 - 3s/epoch - 1ms/step
Epoch 188/2048
1964/1964 - 3s - loss: 3.8015e-04 - val_loss:

1964/1964 - 3s - loss: 3.7342e-04 - val_loss: 3.5173e-04 - 3s/epoch - 1ms/step
Epoch 266/2048
1964/1964 - 2s - loss: 3.7296e-04 - val_loss: 3.4985e-04 - 2s/epoch - 1ms/step
Epoch 267/2048
1964/1964 - 3s - loss: 3.7324e-04 - val_loss: 3.4769e-04 - 3s/epoch - 1ms/step
Epoch 268/2048
1964/1964 - 3s - loss: 3.7304e-04 - val_loss: 3.4709e-04 - 3s/epoch - 1ms/step
Epoch 269/2048
1964/1964 - 3s - loss: 3.7305e-04 - val_loss: 3.6127e-04 - 3s/epoch - 1ms/step
Epoch 270/2048
1964/1964 - 3s - loss: 3.7249e-04 - val_loss: 3.5354e-04 - 3s/epoch - 1ms/step
Epoch 271/2048
1964/1964 - 3s - loss: 3.7303e-04 - val_loss: 3.5445e-04 - 3s/epoch - 1ms/step
Epoch 272/2048
1964/1964 - 3s - loss: 3.7275e-04 - val_loss: 3.5572e-04 - 3s/epoch - 1ms/step
Epoch 273/2048
1964/1964 - 3s - loss: 3.7277e-04 - val_loss: 3.5419e-04 - 3s/epoch - 1ms/step
Epoch 274/2048
1964/1964 - 3s - loss: 3.7272e-04 - val_loss: 3.4915e-04 - 3s/epoch - 1ms/step
Epoch 275/2048
1964/1964 - 3s - loss: 3.7265e-04 - val_loss: 3.6237e-04 - 3

Epoch 353/2048
1964/1964 - 3s - loss: 3.6934e-04 - val_loss: 3.4673e-04 - 3s/epoch - 1ms/step
Epoch 354/2048
1964/1964 - 3s - loss: 3.6937e-04 - val_loss: 3.4821e-04 - 3s/epoch - 1ms/step
Epoch 355/2048
1964/1964 - 3s - loss: 3.6943e-04 - val_loss: 3.5849e-04 - 3s/epoch - 1ms/step
Epoch 356/2048
1964/1964 - 3s - loss: 3.6820e-04 - val_loss: 3.4337e-04 - 3s/epoch - 1ms/step
Epoch 357/2048
1964/1964 - 3s - loss: 3.6853e-04 - val_loss: 3.4656e-04 - 3s/epoch - 1ms/step
Epoch 358/2048
1964/1964 - 3s - loss: 3.6927e-04 - val_loss: 3.5033e-04 - 3s/epoch - 1ms/step
Epoch 359/2048
1964/1964 - 3s - loss: 3.6829e-04 - val_loss: 3.5863e-04 - 3s/epoch - 1ms/step
Epoch 360/2048
1964/1964 - 2s - loss: 3.6844e-04 - val_loss: 3.5614e-04 - 2s/epoch - 1ms/step
Epoch 361/2048
1964/1964 - 3s - loss: 3.6841e-04 - val_loss: 3.5515e-04 - 3s/epoch - 1ms/step
Epoch 362/2048
1964/1964 - 3s - loss: 3.6800e-04 - val_loss: 3.5064e-04 - 3s/epoch - 1ms/step
Epoch 363/2048
1964/1964 - 3s - loss: 3.6892e-04 - val_loss:

In [None]:
index = 1

sample = np.asarray(test_dataset.iloc[index])
sample = sample.reshape(1,-1)
print(sample)

actual_coords = np.asarray(test_emotions_OG.iloc[index])

In [None]:
generated = autoencoder.encoder.predict(sample)
print(generated)

In [None]:
regen = autoencoder.decoder.predict(generated)
print(regen)

In [None]:
scaled_sample = scaler.transform(sample)

real_coordinates = (
    model_p.predict(scaled_sample),
    model_a.predict(scaled_sample),
    model_d.predict(scaled_sample)
)

scaled_regen = scaler.transform(regen)

generated_coordinates = (
    model_p.predict(scaled_regen),
    model_a.predict(scaled_regen),
    model_d.predict(scaled_regen)
)


print('Real: %s' % np.asarray(actual_coords))
print('Predicted: %s' % np.asarray(real_coordinates).transpose()[0])
print('Generated: %s' % np.asarray(generated_coordinates).transpose()[0])

## Save Model

In [None]:
autoencoder.save("models/bandai/5frame/ae/autoencoder_5")


# PAD - Latent Space 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings('ignore')

In [None]:
generated_train = autoencoder.encoder.predict(train_dataset)
generated_test = autoencoder.encoder.predict(test_dataset)

generated_train_x = train_emotions.copy()
generated_test_x = test_emotions.copy()
#generated_train_x = train_emotions_OG.copy()
#generated_test_x = test_emotions_OG.copy()

In [None]:
# Remove LMA features whose predictions are too different from the real one
generated_decoded_train = autoencoder.decoder.predict(generated_train)

index_for_removal = []
scaled_sample = scaler.transform(sample)


for ind in range(0,generated_decoded_train.shape[0]):
    print(str(ind) + " / " + str(generated_decoded_train.shape[0]))
    
    predicted = np.asarray([generated_decoded_train[ind]])
    true = np.asarray([train_emotions.iloc[ind]])

    predicted = scaler.transform(predicted)

    predicted = (
        model_p.predict(predicted),
        model_a.predict(predicted),
        model_d.predict(predicted)
    )
    
    predicted = np.asarray(predicted).transpose()
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.25 or mae_errors[1] > 0.25 or mae_errors[2] > 0.25):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.15):
        index_for_removal.append(ind)
        continue
    
    #print()

print(len(index_for_removal))


In [None]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

In [None]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

In [None]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

In [None]:
# Remove LMA features whose predictions are too different from the real one
generated_decoded_test = autoencoder.decoder.predict(generated_test)

index_for_removal = []
scaled_sample = scaler.transform(sample)


for ind in range(0,generated_decoded_test.shape[0]):
    print(str(ind) + " / " + str(generated_decoded_test.shape[0]))
    predicted = np.asarray([generated_decoded_test[ind]])
    true = np.asarray([test_emotions.iloc[ind]])

    predicted = scaler.transform(predicted)

    predicted = (
        model_p.predict(predicted),
        model_a.predict(predicted),
        model_d.predict(predicted)
    )
    
    predicted = np.asarray(predicted).transpose()
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.25 or mae_errors[1] > 0.25 or mae_errors[2] > 0.25):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.15):
        index_for_removal.append(ind)
        continue
    
    #print()

print(len(index_for_removal))


In [None]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

In [None]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

In [None]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

In [None]:
generated_train = autoencoder.encoder.predict(train_dataset)
generated_test = autoencoder.encoder.predict(test_dataset)

generated_train_x = train_emotions.copy()
generated_test_x = test_emotions.copy()
#generated_train_x = train_emotions_OG.copy()
#generated_test_x = test_emotions_OG.copy()

## XGB

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time

In [None]:
generated_test_1 = generated_test[:,0]
generated_train_1 = generated_train[:,0]

generated_test_2 = generated_test[:,1]
generated_train_2 = generated_train[:,1]

generated_test_3 = generated_test[:,2]
generated_train_3 = generated_train[:,2]

generated_test_4 = generated_test[:,3]
generated_train_4 = generated_train[:,3]

generated_test_5 = generated_test[:,4]
generated_train_5 = generated_train[:,4]

In [None]:
xgbr = xgb.XGBRegressor(verbosity=1) 

params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }


model_1 = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.01, max_depth=3, min_child_weight=11, 
                    reg_alpha=0.25, reg_lambda=1.0, gamma=0.0,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )


model_2 = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.01, max_depth=3, min_child_weight=1, 
                    reg_alpha=0.0, reg_lambda=1.25, gamma=0.0,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )

    
model_3 = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=10, min_child_weight=5, 
                    reg_alpha=0.25, reg_lambda=1.0, gamma=0.0,
                    subsample=1.0, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )

model_4 = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.01, max_depth=3, min_child_weight=1, 
                    reg_alpha=0.0, reg_lambda=1.25, gamma=0.0,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )

model_5 = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.01, max_depth=3, min_child_weight=1, 
                    reg_alpha=0.0, reg_lambda=1.25, gamma=0.0,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )

In [None]:
model_1.fit(generated_train_x, generated_train_1)
model_2.fit(generated_train_x, generated_train_2)
model_3.fit(generated_train_x, generated_train_3)
model_4.fit(generated_train_x, generated_train_4)
model_5.fit(generated_train_x, generated_train_5)

In [None]:
pred_y_1 = model_1.predict(generated_test_x)
mse = mean_squared_error(generated_test_1, pred_y_1)
mae = mean_absolute_error(generated_test_1, pred_y_1)
print("Latent Feature 1")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)


pred_y_2 = model_2.predict(generated_test_x)
mse = mean_squared_error(generated_test_2, pred_y_2)
mae = mean_absolute_error(generated_test_2, pred_y_2)
print("\nLatent Feature 2")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)


pred_y_3 = model_3.predict(generated_test_x)
mse = mean_squared_error(generated_test_3, pred_y_3)
mae = mean_absolute_error(generated_test_3, pred_y_3)
print("\nLatent Feature 3")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

pred_y_4 = model_4.predict(generated_test_x)
mse = mean_squared_error(generated_test_4, pred_y_4)
mae = mean_absolute_error(generated_test_4, pred_y_4)
print("\nLatent Feature 4")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

pred_y_5 = model_5.predict(generated_test_x)
mse = mean_squared_error(generated_test_5, pred_y_5)
mae = mean_absolute_error(generated_test_5, pred_y_5)
print("\nLatent Feature 5")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

In [None]:
index = 0

row=np.asarray([generated_test_x.iloc[index]])
#row = np.asarray([[0.0, 0.0, 0.0]])

y_1 = model_1.predict(row)
y_2 = model_2.predict(row)
y_3 = model_3.predict(row)
y_4 = model_4.predict(row)
y_5 = model_5.predict(row)

In [None]:
sample = np.asarray([[y_1[0], y_2[0], y_3[0], y_4[0], y_5[0]]])
og_coords = generated_test_x.iloc[index]
print(og_coords)
print(sample)

In [None]:
regen = autoencoder.decoder.predict(sample)
print(regen)

In [None]:
scaled_regen = scaler.transform(regen)

generated_coordinates = (
    model_p.predict(scaled_regen),
    model_a.predict(scaled_regen),
    model_d.predict(scaled_regen)
)


print('Real: %s' % np.asarray(og_coords))
print('Generated: %s' % np.asarray(generated_coordinates).transpose()[0])

## Save Models

In [None]:
model_1.save_model("models/bandai/5frame/ae/bandai_pad2l1_model.json")
model_2.save_model("models/bandai/5frame/ae/bandai_pad2l2_model.json")
model_3.save_model("models/bandai/5frame/ae/bandai_pad2l3_model.json")
model_4.save_model("models/bandai/5frame/ae/bandai_pad2l4_model.json")
model_5.save_model("models/bandai/5frame/ae/bandai_pad2l5_model.json")

## RandomGridSearchCV

In [None]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

n_iter = 150

In [None]:
model_1 = xgb.XGBRegressor(
                    n_estimators=5000,
                    objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )

model_2 = xgb.XGBRegressor(
                    n_estimators=5000,
                    objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )

model_3 = xgb.XGBRegressor(
                    n_estimators=5000,
                    objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )

model_4 = xgb.XGBRegressor(
                    n_estimators=5000,
                    objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )

model_5 = xgb.XGBRegressor(
                    n_estimators=5000,
                    objective="reg:squarederror",
                    tree_method='gpu_hist', gpu_id=1
                )

In [None]:
kfold = KFold(n_splits=2, shuffle=True)

random_search_1 = RandomizedSearchCV(model_1, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search_1.fit(generated_train_x, generated_train_1)


print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time.time() - start), n_iter))

In [None]:
kfold = KFold(n_splits=2, shuffle=True)

random_search_2 = RandomizedSearchCV(model_2, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search_2.fit(generated_train_x, generated_train_2)


print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time.time() - start), n_iter))

In [None]:
kfold = KFold(n_splits=2, shuffle=True)

random_search_3 = RandomizedSearchCV(model_3, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search_3.fit(generated_train_x, generated_train_3)


print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time.time() - start), n_iter))

In [None]:
kfold = KFold(n_splits=2, shuffle=True)

random_search_4 = RandomizedSearchCV(model_4, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search_4.fit(generated_train_x, generated_train_4)


print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time.time() - start), n_iter))

In [None]:
kfold = KFold(n_splits=2, shuffle=True)

random_search_5 = RandomizedSearchCV(model_5, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search_5.fit(generated_train_x, generated_train_5)


print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time.time() - start), n_iter))

In [None]:
best_regressor_1 = random_search_1.best_estimator_

print(best_regressor_1.get_params())

In [None]:
best_regressor_2 = random_search_2.best_estimator_

print(best_regressor_2.get_params())

In [None]:
best_regressor_3 = random_search_3.best_estimator_

print(best_regressor_3.get_params())

In [None]:
best_regressor_4 = random_search_4.best_estimator_

print(best_regressor_4.get_params())

In [None]:
best_regressor_5 = random_search_5.best_estimator_

print(best_regressor_5.get_params())

In [None]:
pred_y_1 = best_regressor_1.predict(generated_test_x)
mse = mean_squared_error(generated_test_1, pred_y_1)
mae = mean_absolute_error(generated_test_1, pred_y_1)
print("Latent Feature 1")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)


pred_y_2 = best_regressor_2.predict(generated_test_x)
mse = mean_squared_error(generated_test_2, pred_y_2)
mae = mean_absolute_error(generated_test_2, pred_y_2)
print("\nLatent Feature 2")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)


pred_y_3 = best_regressor_3.predict(generated_test_x)
mse = mean_squared_error(generated_test_3, pred_y_3)
mae = mean_absolute_error(generated_test_3, pred_y_3)
print("\nLatent Feature 3")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

pred_y_4 = best_regressor_4.predict(generated_test_x)
mse = mean_squared_error(generated_test_4, pred_y_4)
mae = mean_absolute_error(generated_test_4, pred_y_4)
print("\nLatent Feature 4")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

pred_y_5 = best_regressor_5.predict(generated_test_x)
mse = mean_squared_error(generated_test_5, pred_y_5)
mae = mean_absolute_error(generated_test_5, pred_y_5)
print("\nLatent Feature 5")
print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

In [None]:
index = 0

row=np.asarray([generated_test_x.iloc[index]])

y_1 = best_regressor_1.predict(row)
y_2 = best_regressor_2.predict(row)
y_3 = best_regressor_3.predict(row)
y_4 = best_regressor_4.predict(row)
y_5 = best_regressor_5.predict(row)

In [None]:
sample = np.asarray([[y_1[0], y_2[0], y_3[0], y_4[0], y_5[0]]])
og_coords = generated_test_x.iloc[index]
print(og_coords)
print(sample)

In [None]:
regen = autoencoder.decoder.predict(sample)
print(regen)

In [None]:
scaled_regen = scaler.transform(regen)

generated_coordinates = (
    model_p.predict(scaled_regen),
    model_a.predict(scaled_regen),
    model_d.predict(scaled_regen)
)


print('Real: %s' % np.asarray(og_coords))
print('Generated: %s' % np.asarray(generated_coordinates).transpose()[0])

In [None]:
best_regressor_1.save_model("models/bandai/5frame/ae/bandai_pad2l1_model.json")
best_regressor_2.save_model("models/bandai/5frame/ae/bandai_pad2l2_model.json")
best_regressor_3.save_model("models/bandai/5frame/ae/bandai_pad2l3_model.json")
best_regressor_4.save_model("models/bandai/5frame/ae/bandai_pad2l4_model.json")
best_regressor_5.save_model("models/bandai/5frame/ae/bandai_pad2l5_model.json")