In [892]:
from tensorflow import keras
from keras import layers
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
from keras import backend as K
import pandas as pd
from sklearn.utils import shuffle
import tensorflow as tf
import xgboost as xgb
import joblib

# Current Best - 13 (0.156)

# AutoEncoder

## Encoder

In [893]:
original_dim = 27
intermediate_dim = 13
latent_dim = 1

inputs = keras.Input(shape=(original_dim,))
x = layers.Dense(intermediate_dim, activation='linear')(inputs)
#x = layers.Dense(intermediate_dim, activation='linear')(inputs)

h = layers.Dense(intermediate_dim, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(h)
z_log_sigma = layers.Dense(latent_dim)(h)


### Sampling Layer

In [894]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_sigma = inputs
        
        batch = tf.shape(z_mean)[0]
        
        dim = tf.shape(z_mean)[1]
        
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_sigma) * epsilon

z = Sampling()([z_mean, z_log_sigma])

## Decoder & VAE

In [895]:
# Create encoder
encoder = keras.Model(inputs, [z_mean, z_log_sigma, z], name='encoder')

# Create decoder
latent_inputs = keras.Input(shape=(latent_dim,), name='z_sampling')
x = layers.Dense(intermediate_dim, activation='relu')(latent_inputs)
#x = layers.Dense(intermediate_dim, activation='linear')(x)

outputs = layers.Dense(original_dim, activation='linear')(x)
decoder = keras.Model(latent_inputs, outputs, name='decoder')

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = keras.Model(inputs, outputs, name='vae_mlp')

## Loss

In [896]:
#mse = keras.losses.MeanSquaredError()
#reconstruction_loss = mse(inputs, outputs)
#reconstruction_loss *= 27
reconstruction_loss_factor = 2500
reconstruction_loss = tf.keras.backend.mean(tf.keras.backend.square(inputs-outputs))

kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5

vae_loss = K.mean((reconstruction_loss_factor*reconstruction_loss) + kl_loss)
vae.add_loss(vae_loss)

## Data

In [897]:
dataset = pd.read_csv('datasets/Fs_B_O_DANCE_WALK_KIN_0.5sec.csv')

train_dataset = dataset.sample(frac=0.85, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)


train_dataset = np.asarray(train_dataset)
test_dataset = np.asarray(test_dataset)

x_train = train_dataset.reshape((len(train_dataset), np.prod(train_dataset.shape[1:])))
x_test = test_dataset.reshape((len(test_dataset), np.prod(test_dataset.shape[1:])))

print(len(x_train[0]))

No Training Samples: 34156
No Test Samples: 6028
27


## Train

In [898]:
optimizer=keras.optimizers.Adam(learning_rate=0.001)
vae.compile(optimizer=optimizer)

In [899]:
vae.fit(x_train, x_train,
        epochs=128,
        batch_size=16,
       )

Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128
Epoch 78

Epoch 96/128
Epoch 97/128
Epoch 98/128
Epoch 99/128
Epoch 100/128
Epoch 101/128
Epoch 102/128
Epoch 103/128
Epoch 104/128
Epoch 105/128
Epoch 106/128
Epoch 107/128
Epoch 108/128
Epoch 109/128
Epoch 110/128
Epoch 111/128
Epoch 112/128
Epoch 113/128
Epoch 114/128
Epoch 115/128
Epoch 116/128
Epoch 117/128
Epoch 118/128
Epoch 119/128
Epoch 120/128
Epoch 121/128
Epoch 122/128
Epoch 123/128
Epoch 124/128
Epoch 125/128
Epoch 126/128
Epoch 127/128
Epoch 128/128


<keras.callbacks.History at 0x7f9038082ed0>

## Test

### Sample Comparison & Emotion Classification

In [900]:
sample = np.asarray(x_test[0])
sample = sample.reshape(1,-1)
print(sample)

[[ 0.4466348   0.14710729  0.3739801   0.25799571  0.39682912  0.4152373
   0.26684728  0.3484388   0.286151    0.27755528  0.0101821   0.06165774
  -0.12004568  0.98675356  0.22833495  0.12259644  0.08724231  0.1242854
   0.86252416  0.45885172  0.28940966  0.27274926  0.44517976  2.21142586
   1.66203023  0.66801273  0.2265753 ]]


In [901]:
mean, var, generated = encoder.predict(sample)
print(generated)

[[0.5033684]]


In [902]:
regen = decoder.predict(generated)
print(regen)

[[ 5.5484861e-01  3.5084468e-01  3.6406499e-01  3.7590203e-01
   4.1386586e-01  4.2803577e-01  3.3263391e-01  3.5062253e-01
   2.8623506e-01  2.7862325e-01  6.7920238e-04 -9.2407927e-02
  -8.8538274e-02  9.5004171e-01  3.1129864e-01  1.2893844e-01
   1.4315701e-01  4.7333556e-01  4.8753867e-01  3.0825317e-01
   3.1995279e-01  3.4611589e-01  1.2733268e+00  1.2840443e+00
   7.9430330e-01  8.1321383e-01  7.9671419e-01]]


In [903]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_S_DANCE_WALK_KIN_0.5sec.pkl') 

In [904]:
scaled_sample = scaler.transform(sample)

real_coordinates = (
    model_p.predict(scaled_sample),
    model_a.predict(scaled_sample),
    model_d.predict(scaled_sample)
)

scaled_regen = scaler.transform(regen)

generated_coordinates = (
    model_p.predict(scaled_regen),
    model_a.predict(scaled_regen),
    model_d.predict(scaled_regen)
)


print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % np.asarray(generated_coordinates))

Real: [[-0.22140872]
 [-0.21172825]
 [-0.24292253]]
Predicted: [[ 0.01884224]
 [-0.21144058]
 [ 0.17568679]]


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


### MAE & MSE

In [905]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [906]:
mean, var, generated = encoder.predict(x_test)
regen = decoder.predict(generated)

print(regen)

[[0.55787915 0.35199797 0.36516345 ... 0.809126   0.82829225 0.812191  ]
 [0.5661112  0.35513067 0.3681472  ... 0.8493901  0.8692511  0.85423195]
 [0.5012837  0.33046055 0.3446502  ... 0.53231066 0.54670066 0.5231598 ]
 ...
 [0.4742248  0.3201633  0.3348426  ... 0.3999623  0.41206875 0.38497084]
 [0.89977664 0.46078876 0.47904128 ... 1.7156973  1.8581711  1.5840826 ]
 [0.6486139  0.36632133 0.37001365 ... 2.332574   2.525494   1.6856515 ]]


In [907]:
mae_errors = mean_absolute_error(x_test, regen, multioutput='raw_values')
mse_errors = mean_squared_error(x_test, regen, multioutput='raw_values')

features = ["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          "avg_neck_rotation_w", "avg_neck_rotation_x", "avg_neck_rotation_y", "avg_neck_rotation_z",
          "avg_total_body_volume",
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
         ]

print("Overall MAE: " + str(mean_absolute_error(x_test, regen)))

print()
for i in range(len(errors)):
    print("==" + features[i] + "==")
    print("MSE: %.5f" % mse_errors[i])
    print("MAE: %.5f" % mae_errors[i])
    print("Example [Regen-Real]: " + str(regen[i][i]) + " - " + str(x_test[i][i]))
    print()

Overall MAE: 0.15501373445649122

==max_hand_distance==
MSE: 0.05516
MAE: 0.18323
Example [Regen-Real]: 0.55787915 - 0.446634799880184

==avg_l_hand_hip_distance==
MSE: 0.01851
MAE: 0.10729
Example [Regen-Real]: 0.35513067 - 0.441235588949159

==avg_r_hand_hip_distance==
MSE: 0.02302
MAE: 0.12063
Example [Regen-Real]: 0.3446502 - 0.3975119209907523

==max_stride_length==
MSE: 0.01941
MAE: 0.10759
Example [Regen-Real]: 0.40246898 - 0.695378154366109

==avg_l_hand_chest_distance==
MSE: 0.01120
MAE: 0.08009
Example [Regen-Real]: 0.49335253 - 0.6968724931504217

==avg_r_hand_chest_distance==
MSE: 0.01132
MAE: 0.07951
Example [Regen-Real]: 0.41673237 - 0.3997284225140244

==avg_l_elbow_hip_distance==
MSE: 0.00510
MAE: 0.05250
Example [Regen-Real]: 0.3657751 - 0.3816283799440004

==avg_r_elbow_hip_distance==
MSE: 0.00556
MAE: 0.05462
Example [Regen-Real]: 0.34057185 - 0.3553271114075399

==avg_chest_pelvis_distance==
MSE: 0.00000
MAE: 0.00014
Example [Regen-Real]: 0.28622597 - 0.286150999984

# PAD to Latent Space

In [938]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib

from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time


## Create Dataset

In [917]:
reg_train_X = train_emotions
reg_test_X = test_emotions

reg_train_X

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
8678,0.50,0.7,0.4
29008,0.60,0.5,0.2
3261,0.70,0.2,0.2
926,0.60,0.5,0.2
31804,0.60,0.5,0.2
...,...,...,...
1901,-0.50,0.6,0.9
21963,0.70,0.2,0.2
12966,-0.60,-0.3,-0.3
6060,-0.50,0.6,0.9


In [918]:
reg_test_X

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
9200,0.10,-0.70,-0.2
19118,-0.35,0.70,-0.8
35367,-0.40,0.25,-0.1
30508,-0.50,0.60,0.9
15792,0.10,-0.70,-0.2
...,...,...,...
21904,0.70,0.20,0.2
20308,-0.35,0.70,-0.8
19799,-0.85,-0.10,-0.8
19584,0.60,-0.55,0.1


In [919]:
mean, var, generated = encoder.predict(x_train)
reg_train_Y = generated

mean, var, generated = encoder.predict(x_test)
reg_test_Y = generated

print(len(reg_train_Y))
print(len(reg_test_Y))

34156
6028


## Train Regressor

In [920]:
model = xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=10, min_child_weight=5, 
                    reg_alpha=0.1, reg_lambda=1, gamma=0.0,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )

In [921]:
model.fit(reg_train_X, reg_train_Y)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.75,
             enable_categorical=False, gamma=0.0, gpu_id=0,
             importance_type=None, interaction_constraints='',
             learning_rate=0.05, max_delta_step=0, max_depth=10,
             min_child_weight=5, missing=nan, monotone_constraints='()',
             n_estimators=1500, n_jobs=12, num_parallel_tree=1,
             predictor='auto', random_state=0, reg_alpha=0.1, reg_lambda=1,
             scale_pos_weight=1, subsample=0.75, tree_method='gpu_hist',
             validate_parameters=1, verbosity=None)

## Test Model

In [922]:
score = model.score(reg_train_X, reg_train_Y)
print("Training score: ", score)

Training score:  0.17879544902442368


In [923]:
pred_y = model.predict(reg_test_X)
mse = mean_squared_error(reg_test_Y, pred_y)
mae = mean_absolute_error(reg_test_Y, pred_y)

print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

MSE: 0.98
MAE: 0.78


In [934]:
index = 20

real_value = np.asarray([reg_test_Y[index]])

row=reg_test_X.iloc[index]
row = np.asarray([row])

predicted = model.predict(row)

print('Real: %s' % real_value[0])
print('Predicted: %s' % predicted)

Real: [0.07238135]
Predicted: [0.39500234]


## RandomSearchCV

In [939]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

n_iter = 300

In [940]:
model = xgb.XGBRegressor(
                    n_estimators=1500,
                    objective="reg:squarederror",
                    tree_method='gpu_hist'
                )


In [941]:
# Pleasure
# run randomized search
kfold = KFold(n_splits=5, shuffle=True)

random_search = RandomizedSearchCV(model, param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

start = time.time()
random_search.fit(reg_train_X, reg_train_Y)


print("GridSearchCV took %.2f seconds"
      " parameter settings." % ((time.time() - start)))

GridSearchCV took 2863.87 seconds parameter settings.


### Results

In [942]:
best_regressor = random_search.best_estimator_

print(best_regressor.get_params())

{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0.001, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 3, 'min_child_weight': 5, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1500, 'n_jobs': 12, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 0, 'reg_alpha': 0.25, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.75, 'tree_method': 'gpu_hist', 'validate_parameters': 1, 'verbosity': None, 'lambda': 1, 'eta': 0.01, 'alpha': 0.25}


In [943]:
pred_y = best_regressor.predict(reg_test_X)
mse = mean_squared_error(reg_test_Y, pred_y)
mae = mean_absolute_error(reg_test_Y, pred_y)

print("MSE: %.2f" % mse)
print("MAE: %.2f" % mae)

MSE: 0.98
MAE: 0.78
