# XGBRegression (P,A,D coordinates to LMA Features)

In [9]:
"""
Using the identified emotions of each LMA feature set rather than the normal ones
"""

'\nUsing the identified emotions of each LMA feature set rather than the normal ones\n'

## Imports

In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [11]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

## Load Data

In [12]:
dataset = pd.read_csv('datasets/BANDAI_5frame.csv')
dataset.head()

Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude,EMOTION_P,EMOTION_A,EMOTION_D
0,0.433596,0.130036,0.335475,0.538619,0.184473,0.310089,0.210679,0.093915,0.236151,0.223894,...,-0.137483,-0.139467,0.09292,0.196129,0.221155,0.137483,0.139467,0.05,-0.05,0.0
1,0.348051,0.145184,0.256784,0.455501,0.197954,0.284215,0.172405,0.104876,0.236151,0.223894,...,-0.072441,-0.095084,0.048935,0.032804,0.070625,0.072096,0.045111,0.05,-0.05,0.0
2,0.320294,0.206306,0.180224,0.38019,0.234406,0.259905,0.133006,0.127385,0.236151,0.223894,...,-0.039016,-0.093423,0.030819,0.043059,0.028182,0.036532,0.014232,0.05,-0.05,0.0
3,0.400389,0.2873,0.136974,0.319861,0.279208,0.23945,0.112632,0.156856,0.236151,0.223894,...,-0.036825,-0.101489,0.044534,0.054382,0.015679,0.012813,0.021506,0.05,-0.05,0.0
4,0.465923,0.356129,0.136171,0.309995,0.313229,0.224432,0.11295,0.18668,0.236151,0.223894,...,-0.054338,-0.117573,0.04467,0.036618,0.020074,0.022012,0.024961,0.05,-0.05,0.0


### Split into Test and Train data

In [13]:
train_dataset = dataset.sample(frac=0.9, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 70696
No Test Samples: 7855


### Split Features from Targets

In [14]:
train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
train_emotions_OG = train_emotions.copy()

test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions_OG = test_emotions.copy()

In [15]:
train_dataset_scaled = scaler.transform(train_dataset)
test_dataset_scaled = scaler.transform(test_dataset)

In [16]:
train_emotions_p = model_p.predict(train_dataset_scaled)
train_emotions_a = model_a.predict(train_dataset_scaled)
train_emotions_d = model_d.predict(train_dataset_scaled)

for i in range(len(train_dataset_scaled)):
    train_emotions.iloc[i] = [train_emotions_p[i], train_emotions_a[i], train_emotions_d[i]]

train_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
7351,0.099718,-0.698827,-0.197279
62084,0.099873,0.605899,0.398894
49555,0.099778,0.593024,0.397468
16892,0.099649,-0.742525,-0.244961
58705,0.099884,-0.75412,-0.24718


In [17]:
train_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
7351,0.1,-0.7,-0.2
62084,0.1,0.6,0.4
49555,0.1,0.6,0.4
16892,0.1,-0.75,-0.25
58705,0.1,-0.75,-0.25


In [18]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,train_emotions.shape[0]):
    predicted = np.asarray([train_emotions.iloc[ind]])
    true = np.asarray([train_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [19]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

(69896, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
7351,0.1,-0.7,-0.2
62084,0.1,0.6,0.4
49555,0.1,0.6,0.4
16892,0.1,-0.75,-0.25
58705,0.1,-0.75,-0.25


In [20]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

(69896, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
7351,0.099718,-0.698827,-0.197279
62084,0.099873,0.605899,0.398894
49555,0.099778,0.593024,0.397468
16892,0.099649,-0.742525,-0.244961
58705,0.099884,-0.75412,-0.24718


In [21]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

(69896, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
7351,0.604368,0.302985,0.296439,0.544693,0.404336,0.376776,0.263776,0.262477,0.236151,0.223894,...,-0.097041,-0.060321,-0.058915,-0.071626,-0.068577,0.012729,0.006924,0.013594,0.028912,0.005692
62084,0.556456,0.477493,0.340444,0.248365,0.282625,0.325726,0.276317,0.388607,0.236151,0.223894,...,-0.073528,-0.067397,-0.032656,-0.118374,-0.085121,0.023909,0.016929,0.001895,0.020161,0.01226
49555,0.684605,0.29476,0.658896,0.352561,0.232431,0.49092,0.325048,0.28749,0.236151,0.223894,...,-0.065003,-0.054594,-0.01913,-0.07608,-0.062154,0.025953,0.024376,0.011333,0.0158,0.011349
16892,0.244374,0.191518,0.113416,0.325269,0.314431,0.296409,0.123345,0.133327,0.236151,0.223894,...,-0.027501,-0.096647,-0.05169,-0.012144,-0.046664,0.012652,0.011494,0.008686,0.010626,0.003875
58705,0.483201,0.09567,0.37438,0.454357,0.26726,0.367344,0.105676,0.139039,0.236151,0.223894,...,-0.052596,-0.079725,-0.111117,-0.060064,-0.066178,0.01909,0.007868,0.021528,0.034775,0.008149


In [22]:
test_emotions_p = model_p.predict(test_dataset_scaled)
test_emotions_a = model_a.predict(test_dataset_scaled)
test_emotions_d = model_d.predict(test_dataset_scaled)

for i in range(len(test_dataset_scaled)):
    test_emotions.iloc[i] = [test_emotions_p[i], test_emotions_a[i], test_emotions_d[i]]


test_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
51638,0.099969,0.611916,0.435192
73793,0.044224,-0.133925,-0.025402
29425,-0.09634,-0.591326,-0.154869
16663,0.060335,0.00162,0.029821
55398,0.1162,0.606721,0.397655


In [23]:
test_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
51638,0.1,0.6,0.4
73793,0.05,-0.05,0.0
29425,-0.1,-0.6,-0.15
16663,0.05,-0.05,0.0
55398,0.1,0.6,0.4


In [24]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,test_emotions.shape[0]):
    predicted = np.asarray([test_emotions.iloc[ind]])
    true = np.asarray([test_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [25]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

(7079, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
51638,0.1,0.6,0.4
73793,0.05,-0.05,0.0
29425,-0.1,-0.6,-0.15
16663,0.05,-0.05,0.0
55398,0.1,0.6,0.4


In [26]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

(7079, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
51638,0.099969,0.611916,0.435192
73793,0.044224,-0.133925,-0.025402
29425,-0.09634,-0.591326,-0.154869
16663,0.060335,0.00162,0.029821
55398,0.1162,0.606721,0.397655


In [27]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

(7079, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
51638,0.533268,0.220907,0.552661,0.553833,0.161226,0.420344,0.280216,0.194029,0.236151,0.223894,...,-0.083756,-0.07852,-0.043251,-0.111144,-0.084214,0.018551,0.02654,0.045843,0.022752,0.01115
73793,0.279711,0.09756,0.247884,0.45388,0.289107,0.330829,0.125646,0.135034,0.236151,0.223894,...,-0.03592,-0.090961,-0.092068,-0.044842,-0.060879,0.015079,0.010023,0.020358,0.021572,0.009062
29425,0.576195,0.318318,0.261015,0.342603,0.389182,0.377755,0.284904,0.247827,0.236151,0.223894,...,-0.035266,-0.05901,-0.077995,-0.031408,-0.046864,0.006352,0.003616,0.005287,0.007004,0.004272
16663,0.671994,0.304476,0.373107,0.610709,0.4083,0.412786,0.254441,0.270185,0.236151,0.223894,...,-0.118114,-0.07299,-0.065892,-0.115795,-0.08492,0.006605,0.012515,0.005481,0.019563,0.008173
55398,0.582712,0.378021,0.238468,0.465389,0.311953,0.292135,0.070844,0.210999,0.236151,0.223894,...,-0.234069,-0.267711,-0.270491,-0.185152,-0.238769,0.019624,0.018936,0.054162,0.027153,0.013823


In [28]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [29]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [30]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_total_body_volume
avg_lower_body_volume
avg_upper_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## Define Model

In [None]:
xgbr = xgb.XGBRegressor(verbosity=1) 

params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=6, min_child_weight=11, 
                    reg_alpha=0.25, reg_lambda=1.25, gamma=0.01,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

## Train Model

In [None]:
for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    models[i].fit(train_emotions, train_y_sets[i])

## Test Model

### Training Score

In [None]:
for i in range(len(models)):
    score = models[i].score(train_emotions, train_y_sets[i])
    print(train_y_sets[i].columns[0])
    print("Training score: ", score)

    print()

### Test Set MAE & MSE

In [None]:
for i in range(len(models)):
    pred_y = models[i].predict(test_emotions)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

In [None]:
generated_features = []

for i in range(len(models)):
    pred_y = models[i].predict(test_emotions)
    generated_features.append(pred_y)
    
    
rows = []
for i in range(len(generated_features[0])):
    row = []
    for j in range(len(models)):
        row.append(generated_features[j][i])
    rows.append(row)
    
print(len(rows))
        
generated = pd.DataFrame(rows, columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated.head()

In [None]:
generated_scaled = scaler.transform(generated)

gen_emotions_p = model_p.predict(generated_scaled)
gen_emotions_a = model_a.predict(generated_scaled)
gen_emotions_d = model_d.predict(generated_scaled)

rows = []
for i in range(len(generated_scaled)):
    rows.append([gen_emotions_p[i], gen_emotions_a[i], gen_emotions_d[i]])

gen_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

gen_emotions.head()

In [None]:
import random

mae_errors = mean_absolute_error(test_emotions, gen_emotions, multioutput='raw_values')
mse_errors = mean_squared_error(test_emotions, gen_emotions, multioutput='raw_values')

features = ["PLEASURE", "AROUSAL", "DOMINANCE"
         ]

print("Overall MAE: " + str(mean_absolute_error(test_emotions, gen_emotions)))

print()
for i in range(len(mse_errors)):
    print("==" + features[i] + "==")
    print("MSE: %.5f" % mse_errors[i])
    print("MAE: %.5f" % mae_errors[i])
    print()
    
for i in range(30):
    row = random.randint(0, len(test_emotions))

    print("Real: " + str([test_emotions_OG.iloc[row,0], test_emotions_OG.iloc[row,1], test_emotions_OG.iloc[row,2]]))
    print("Predicted: " + str([test_emotions.iloc[row,0], test_emotions.iloc[row,1], test_emotions.iloc[row,2]]))
    print("Generated: " + str([gen_emotions.iloc[row,0], gen_emotions.iloc[row,1], gen_emotions.iloc[row,2]]))
    print()

### Prediction Examples

In [None]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

In [None]:
index = 4

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(models[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

## Save Models

In [None]:
# save to JSON


## Load Models

# Hyperparameter Grid Search with XGBoost

In [31]:
from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_absolute_error


import xgboost as xgb

xgb.set_config(verbosity=2)

from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import joblib

## Load Data

In [32]:
dataset = pd.read_csv('datasets/BANDAI_5frame.csv')
dataset.head()

Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude,EMOTION_P,EMOTION_A,EMOTION_D
0,0.433596,0.130036,0.335475,0.538619,0.184473,0.310089,0.210679,0.093915,0.236151,0.223894,...,-0.137483,-0.139467,0.09292,0.196129,0.221155,0.137483,0.139467,0.05,-0.05,0.0
1,0.348051,0.145184,0.256784,0.455501,0.197954,0.284215,0.172405,0.104876,0.236151,0.223894,...,-0.072441,-0.095084,0.048935,0.032804,0.070625,0.072096,0.045111,0.05,-0.05,0.0
2,0.320294,0.206306,0.180224,0.38019,0.234406,0.259905,0.133006,0.127385,0.236151,0.223894,...,-0.039016,-0.093423,0.030819,0.043059,0.028182,0.036532,0.014232,0.05,-0.05,0.0
3,0.400389,0.2873,0.136974,0.319861,0.279208,0.23945,0.112632,0.156856,0.236151,0.223894,...,-0.036825,-0.101489,0.044534,0.054382,0.015679,0.012813,0.021506,0.05,-0.05,0.0
4,0.465923,0.356129,0.136171,0.309995,0.313229,0.224432,0.11295,0.18668,0.236151,0.223894,...,-0.054338,-0.117573,0.04467,0.036618,0.020074,0.022012,0.024961,0.05,-0.05,0.0


### Split into Test and Train data

In [33]:
train_dataset = dataset.sample(frac=0.9, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 70696
No Test Samples: 7855


### Split Features from Targets

In [34]:
train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
train_emotions_OG = train_emotions.copy()

test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions_OG = test_emotions.copy()

In [35]:
train_dataset_scaled = scaler.transform(train_dataset)
test_dataset_scaled = scaler.transform(test_dataset)

In [36]:
train_emotions_p = model_p.predict(train_dataset_scaled)
train_emotions_a = model_a.predict(train_dataset_scaled)
train_emotions_d = model_d.predict(train_dataset_scaled)

for i in range(len(train_dataset_scaled)):
    train_emotions.iloc[i] = [train_emotions_p[i], train_emotions_a[i], train_emotions_d[i]]

train_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
70195,0.079177,-0.063562,0.002162
2827,0.052445,-0.084121,-0.039671
11892,-0.09995,-0.600858,-0.150532
67161,-0.100103,-0.599273,-0.147938
18249,0.300138,0.399033,0.603562


In [37]:
train_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
70195,0.05,-0.05,0.0
2827,0.05,-0.05,0.0
11892,-0.1,-0.6,-0.15
67161,-0.1,-0.6,-0.15
18249,0.3,0.4,0.6


In [38]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,train_emotions.shape[0]):
    predicted = np.asarray([train_emotions.iloc[ind]])
    true = np.asarray([train_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [39]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

(69896, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
70195,0.05,-0.05,0.0
2827,0.05,-0.05,0.0
11892,-0.1,-0.6,-0.15
67161,-0.1,-0.6,-0.15
18249,0.3,0.4,0.6


In [40]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

(69896, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
70195,0.079177,-0.063562,0.002162
2827,0.052445,-0.084121,-0.039671
11892,-0.09995,-0.600858,-0.150532
67161,-0.100103,-0.599273,-0.147938
18249,0.300138,0.399033,0.603562


In [41]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

(69896, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
70195,0.538345,0.333439,0.31889,0.628014,0.334773,0.226657,0.382729,0.266141,0.236151,0.223894,...,-0.153649,-0.1274263,-0.143843,-0.1267129,-0.136054,0.017925,0.028507,0.038809,0.014869,0.019265
2827,0.768102,0.341124,0.441627,0.587848,0.428401,0.435858,0.267987,0.290633,0.236151,0.223894,...,-0.089788,-0.06825234,-0.064226,-0.05501893,-0.07689,0.015034,0.006461,0.020021,0.013387,0.007494
11892,0.143923,0.122146,0.078078,0.203586,0.285226,0.279552,0.108115,0.10627,0.236151,0.223894,...,-0.0,-7.450581e-09,-0.0,-3.72529e-09,-0.0,0.0,0.0,0.0,0.0,0.0
67161,0.575246,0.295275,0.279979,0.428454,0.396252,0.37438,0.27465,0.27095,0.236151,0.223894,...,-0.068669,-0.06784613,-0.046581,-0.05078013,-0.061736,0.006481,0.002076,0.007007,0.010162,0.002395
18249,0.194773,0.075525,0.142447,0.669358,0.178291,0.222101,0.144947,0.057834,0.236151,0.223894,...,-0.098005,-0.1977552,-0.200025,-0.1181595,-0.142245,0.016819,0.014915,0.023139,0.036591,0.013176


In [42]:
test_emotions_p = model_p.predict(test_dataset_scaled)
test_emotions_a = model_a.predict(test_dataset_scaled)
test_emotions_d = model_d.predict(test_dataset_scaled)

for i in range(len(test_dataset_scaled)):
    test_emotions.iloc[i] = [test_emotions_p[i], test_emotions_a[i], test_emotions_d[i]]


test_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
43361,0.570993,0.412148,0.117429
62119,0.109227,0.593001,0.425317
11536,-0.106213,-0.582618,-0.14765
77619,0.102,0.632815,0.437384
39811,0.150788,0.541237,0.397413


In [43]:
test_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
43361,0.6,0.4,0.1
62119,0.1,0.6,0.4
11536,-0.1,-0.6,-0.15
77619,0.1,0.6,0.4
39811,0.1,0.6,0.4


In [44]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,test_emotions.shape[0]):
    predicted = np.asarray([test_emotions.iloc[ind]])
    true = np.asarray([test_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [45]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

(7079, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
43361,0.6,0.4,0.1
62119,0.1,0.6,0.4
11536,-0.1,-0.6,-0.15
77619,0.1,0.6,0.4
39811,0.1,0.6,0.4


In [46]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

(7079, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
43361,0.570993,0.412148,0.117429
62119,0.109227,0.593001,0.425317
11536,-0.106213,-0.582618,-0.14765
77619,0.102,0.632815,0.437384
39811,0.150788,0.541237,0.397413


In [47]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

(7079, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
43361,0.839855,0.584549,0.303045,0.807206,0.555954,0.328261,0.365081,0.297046,0.236151,0.223894,...,-0.202142,-0.163304,-0.215158,-0.184951,-0.185434,0.035174,0.037837,0.049842,0.019922,0.016338
62119,0.519139,0.31807,0.370503,0.421927,0.375346,0.267286,0.327853,0.223795,0.236151,0.223894,...,-0.023655,-0.127765,-0.121379,-0.035919,-0.06494,0.053079,0.054873,0.009555,0.010473,0.013717
11536,0.266877,0.093068,0.181774,0.343763,0.282216,0.30292,0.126089,0.106268,0.236151,0.223894,...,-0.055647,-0.04848,-0.079677,-0.067767,-0.051282,0.003439,0.004836,0.005902,0.007244,0.002473
77619,0.741372,0.433864,0.502203,0.498241,0.362628,0.401887,0.28445,0.143067,0.236151,0.223894,...,-0.224556,-0.198768,-0.117959,-0.231114,-0.200811,0.021383,0.041349,0.032144,0.019025,0.015452
39811,0.74703,0.425702,0.551746,0.645674,0.257487,0.498926,0.277795,0.440488,0.236151,0.223894,...,-0.198095,-0.216454,-0.193708,-0.248586,-0.209728,0.040924,0.036494,0.024462,0.015689,0.028111


In [48]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [49]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [50]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_total_body_volume
avg_lower_body_volume
avg_upper_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## RandomSearchCV

In [51]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1, 0.15],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [0.85, 1, 1.25],
        'alpha': [0.0, 0.1, 0.25]
        }

n_iter = 175

In [52]:
models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=5000, 
                    objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

In [53]:
# Pleasure
# run randomized search

random_searches = []

for i in range(5):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

max_hand_distance


KeyboardInterrupt: 

In [None]:
for i in range(5,10):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

In [None]:
for i in range(10,15):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

In [None]:
for i in range(15,20):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

In [None]:
for i in range(20,25):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

### Results

In [None]:
best_regressors = []
for i in range(len(random_searches)):
    print(train_y_sets[i].columns[0])
    best_regressor = random_searches[i].best_estimator_

    print(best_regressor.get_params())
    best_regressors.append(best_regressor)
    print()

In [None]:
for i in range(len(best_regressors)):
    pred_y = best_regressors[i].predict(test_emotions)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

In [None]:
generated_features = []

for i in range(len(best_regressors)):
    pred_y = best_regressors[i].predict(test_emotions)
    generated_features.append(pred_y)
    
    
rows = []
for i in range(len(generated_features[0])):
    row = []
    for j in range(len(best_regressors)):
        row.append(generated_features[j][i])
    rows.append(row)
    
print(len(rows))
        
generated = pd.DataFrame(rows, columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated.head()

In [None]:
generated_scaled = scaler.transform(generated)

gen_emotions_p = model_p.predict(generated_scaled)
gen_emotions_a = model_a.predict(generated_scaled)
gen_emotions_d = model_d.predict(generated_scaled)

rows = []
for i in range(len(generated_scaled)):
    rows.append([gen_emotions_p[i], gen_emotions_a[i], gen_emotions_d[i]])

gen_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

gen_emotions.head()

In [None]:
import random

mae_errors = mean_absolute_error(test_emotions, gen_emotions, multioutput='raw_values')
mse_errors = mean_squared_error(test_emotions, gen_emotions, multioutput='raw_values')

features = ["PLEASURE", "AROUSAL", "DOMINANCE"
         ]

print("Overall MAE: " + str(mean_absolute_error(test_emotions, gen_emotions)))

print()
for i in range(len(mse_errors)):
    print("==" + features[i] + "==")
    print("MSE: %.5f" % mse_errors[i])
    print("MAE: %.5f" % mae_errors[i])
    print()
    
for i in range(30):
    row = random.randint(0, len(test_emotions))

    print("Real: " + str([test_emotions_OG.iloc[row,0], test_emotions_OG.iloc[row,1], test_emotions_OG.iloc[row,2]]))
    print("Predicted: " + str([test_emotions.iloc[row,0], test_emotions.iloc[row,1], test_emotions.iloc[row,2]]))
    print("Generated: " + str([gen_emotions.iloc[row,0], gen_emotions.iloc[row,1], gen_emotions.iloc[row,2]]))
    print()

In [None]:
# save to JSON
for i in range(len(best_regressors)):
    best_regressors[i].save_model("models/bandai/5frame/xgb/" + test_y_sets[i].columns[0] +".json")

### Predictions

In [None]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

In [None]:
index = 10

real_coordinates = np.asarray([test_emotions.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(best_regressors[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

In [None]:
real_coordinates = np.asarray([[0.5, 0.0, 0.0]])

generated_features = []
for i in range(len(models)):
    generated_features.append(best_regressors[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])