# XGBRegression (P,A,D coordinates to LMA Features)

In [None]:
"""
Using the identified emotions of each LMA feature set rather than the normal ones
"""

## Imports

In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [8]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

## Load Data

In [None]:
dataset = pd.read_csv('datasets/BANDAI_5frame.csv')
dataset.head()

### Split into Test and Train data

In [None]:
train_dataset = dataset.sample(frac=0.9, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

### Split Features from Targets

In [None]:
train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
train_emotions_OG = train_emotions.copy()

test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions_OG = test_emotions.copy()

In [None]:
train_dataset_scaled = scaler.transform(train_dataset)
test_dataset_scaled = scaler.transform(test_dataset)

In [None]:
train_emotions_p = model_p.predict(train_dataset_scaled)
train_emotions_a = model_a.predict(train_dataset_scaled)
train_emotions_d = model_d.predict(train_dataset_scaled)

for i in range(len(train_dataset_scaled)):
    train_emotions.iloc[i] = [train_emotions_p[i], train_emotions_a[i], train_emotions_d[i]]

train_emotions.head()

In [None]:
train_emotions_OG.head()

In [None]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,train_emotions.shape[0]):
    predicted = np.asarray([train_emotions.iloc[ind]])
    true = np.asarray([train_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [None]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

In [None]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

In [None]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

In [None]:
test_emotions_p = model_p.predict(test_dataset_scaled)
test_emotions_a = model_a.predict(test_dataset_scaled)
test_emotions_d = model_d.predict(test_dataset_scaled)

for i in range(len(test_dataset_scaled)):
    test_emotions.iloc[i] = [test_emotions_p[i], test_emotions_a[i], test_emotions_d[i]]


test_emotions.head()

In [None]:
test_emotions_OG.head()

In [None]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,test_emotions.shape[0]):
    predicted = np.asarray([test_emotions.iloc[ind]])
    true = np.asarray([test_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [None]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

In [None]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

In [None]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

In [None]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [None]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [None]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

## Define Model

In [None]:
xgbr = xgb.XGBRegressor(verbosity=1) 

params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=6, min_child_weight=11, 
                    reg_alpha=0.25, reg_lambda=1.25, gamma=0.01,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

## Train Model

In [None]:
for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    models[i].fit(train_emotions, train_y_sets[i])

## Test Model

### Training Score

In [None]:
for i in range(len(models)):
    score = models[i].score(train_emotions, train_y_sets[i])
    print(train_y_sets[i].columns[0])
    print("Training score: ", score)

    print()

### Test Set MAE & MSE

In [None]:
for i in range(len(models)):
    pred_y = models[i].predict(test_emotions)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

In [None]:
generated_features = []

for i in range(len(models)):
    pred_y = models[i].predict(test_emotions)
    generated_features.append(pred_y)
    
    
rows = []
for i in range(len(generated_features[0])):
    row = []
    for j in range(len(models)):
        row.append(generated_features[j][i])
    rows.append(row)
    
print(len(rows))
        
generated = pd.DataFrame(rows, columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated.head()

In [None]:
generated_scaled = scaler.transform(generated)

gen_emotions_p = model_p.predict(generated_scaled)
gen_emotions_a = model_a.predict(generated_scaled)
gen_emotions_d = model_d.predict(generated_scaled)

rows = []
for i in range(len(generated_scaled)):
    rows.append([gen_emotions_p[i], gen_emotions_a[i], gen_emotions_d[i]])

gen_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

gen_emotions.head()

In [None]:
import random

mae_errors = mean_absolute_error(test_emotions, gen_emotions, multioutput='raw_values')
mse_errors = mean_squared_error(test_emotions, gen_emotions, multioutput='raw_values')

features = ["PLEASURE", "AROUSAL", "DOMINANCE"
         ]

print("Overall MAE: " + str(mean_absolute_error(test_emotions, gen_emotions)))

print()
for i in range(len(mse_errors)):
    print("==" + features[i] + "==")
    print("MSE: %.5f" % mse_errors[i])
    print("MAE: %.5f" % mae_errors[i])
    print()
    
for i in range(30):
    row = random.randint(0, len(test_emotions))

    print("Real: " + str([test_emotions_OG.iloc[row,0], test_emotions_OG.iloc[row,1], test_emotions_OG.iloc[row,2]]))
    print("Predicted: " + str([test_emotions.iloc[row,0], test_emotions.iloc[row,1], test_emotions.iloc[row,2]]))
    print("Generated: " + str([gen_emotions.iloc[row,0], gen_emotions.iloc[row,1], gen_emotions.iloc[row,2]]))
    print()

### Prediction Examples

In [None]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

In [None]:
index = 4

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(models[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

## Save Models

In [None]:
# save to JSON


## Load Models

# Hyperparameter Grid Search with XGBoost

In [9]:
from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_absolute_error


import xgboost as xgb

xgb.set_config(verbosity=2)

from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import joblib

## Load Data

In [10]:
dataset = pd.read_csv('datasets/BANDAI_5frame.csv')
dataset.head()

Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude,EMOTION_P,EMOTION_A,EMOTION_D
0,0.433596,0.130036,0.335475,0.538619,0.184473,0.310089,0.210679,0.093915,0.236151,0.223894,...,-0.137483,-0.139467,0.09292,0.196129,0.221155,0.137483,0.139467,0.05,-0.4,0.0
1,0.348051,0.145184,0.256784,0.455501,0.197954,0.284215,0.172405,0.104876,0.236151,0.223894,...,-0.072441,-0.095084,0.048935,0.032804,0.070625,0.072096,0.045111,0.05,-0.4,0.0
2,0.320294,0.206306,0.180224,0.38019,0.234406,0.259905,0.133006,0.127385,0.236151,0.223894,...,-0.039016,-0.093423,0.030819,0.043059,0.028182,0.036532,0.014232,0.05,-0.4,0.0
3,0.400389,0.2873,0.136974,0.319861,0.279208,0.23945,0.112632,0.156856,0.236151,0.223894,...,-0.036825,-0.101489,0.044534,0.054382,0.015679,0.012813,0.021506,0.05,-0.4,0.0
4,0.465923,0.356129,0.136171,0.309995,0.313229,0.224432,0.11295,0.18668,0.236151,0.223894,...,-0.054338,-0.117573,0.04467,0.036618,0.020074,0.022012,0.024961,0.05,-0.4,0.0


### Split into Test and Train data

In [11]:
train_dataset = dataset.sample(frac=0.9, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 70696
No Test Samples: 7855


### Split Features from Targets

In [12]:
train_emotions = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
train_emotions_OG = train_emotions.copy()

test_emotions = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_emotions_OG = test_emotions.copy()

In [13]:
train_dataset_scaled = scaler.transform(train_dataset)
test_dataset_scaled = scaler.transform(test_dataset)

In [14]:
train_emotions_p = model_p.predict(train_dataset_scaled)
train_emotions_a = model_a.predict(train_dataset_scaled)
train_emotions_d = model_d.predict(train_dataset_scaled)

for i in range(len(train_dataset_scaled)):
    train_emotions.iloc[i] = [train_emotions_p[i], train_emotions_a[i], train_emotions_d[i]]

train_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
13391,0.20069,-0.766424,-0.30165
49122,0.605206,0.407189,0.099735
23480,0.603804,0.404823,0.102552
22517,0.057239,-0.452402,-0.014257
45065,0.6035,0.416509,0.102362


In [15]:
train_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
13391,0.2,-0.75,-0.3
49122,0.6,0.4,0.1
23480,0.6,0.4,0.1
22517,0.05,-0.4,0.0
45065,0.6,0.4,0.1


In [16]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,train_emotions.shape[0]):
    predicted = np.asarray([train_emotions.iloc[ind]])
    true = np.asarray([train_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [17]:
train_emotions_OG.drop(train_emotions_OG.index[index_for_removal], inplace=True)
print(train_emotions_OG.shape)
train_emotions_OG.head()

(69919, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
13391,0.2,-0.75,-0.3
49122,0.6,0.4,0.1
23480,0.6,0.4,0.1
22517,0.05,-0.4,0.0
45065,0.6,0.4,0.1


In [18]:
train_emotions.drop(train_emotions.index[index_for_removal], inplace=True)
print(train_emotions.shape)
train_emotions.head()

(69919, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
13391,0.20069,-0.766424,-0.30165
49122,0.605206,0.407189,0.099735
23480,0.603804,0.404823,0.102552
22517,0.057239,-0.452402,-0.014257
45065,0.6035,0.416509,0.102362


In [19]:
train_dataset.drop(train_dataset.index[index_for_removal], inplace=True)
print(train_dataset.shape)
train_dataset.head()

(69919, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
13391,0.571921,0.311381,0.345439,0.357437,0.393836,0.415825,0.244819,0.260773,0.236151,0.223894,...,-0.082344,-0.027095,-0.030702,-0.073258,-0.053312,0.01388,0.010444,0.011778,0.011464,0.006325
49122,0.368515,0.345495,0.07862,0.59356,0.354981,0.244578,0.204196,0.15076,0.236151,0.223894,...,-0.087647,-0.220397,-0.223613,-0.132652,-0.122548,0.018827,0.033586,0.061841,0.006535,0.01571
23480,0.542308,0.2245,0.297177,0.630508,0.298726,0.320194,0.151059,0.126218,0.236151,0.223894,...,-0.067542,-0.141327,-0.158327,-0.120416,-0.093807,0.012532,0.018096,0.028569,0.012491,0.00725
22517,0.504784,0.380608,0.13176,0.387823,0.390652,0.300942,0.163829,0.152339,0.236151,0.223894,...,-0.03605,-0.057856,-0.056777,-0.021486,-0.047197,0.00942,0.00454,0.005158,0.011765,0.004201
45065,0.550436,0.424875,0.152782,0.749527,0.383791,0.223079,0.223545,0.118409,0.236151,0.223894,...,-0.152251,-0.226213,-0.1973,-0.159084,-0.181428,0.041665,0.027854,0.033737,0.071436,0.018205


In [20]:
test_emotions_p = model_p.predict(test_dataset_scaled)
test_emotions_a = model_a.predict(test_dataset_scaled)
test_emotions_d = model_d.predict(test_dataset_scaled)

for i in range(len(test_dataset_scaled)):
    test_emotions.iloc[i] = [test_emotions_p[i], test_emotions_a[i], test_emotions_d[i]]


test_emotions.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
4986,0.080975,-0.416981,-0.031107
67270,-0.103397,-0.558096,-0.155531
35710,0.100493,0.590483,0.405745
39310,0.188988,-0.716403,-0.293775
8275,0.084309,0.532811,0.347444


In [21]:
test_emotions_OG.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
4986,0.05,-0.4,0.0
67270,-0.1,-0.55,-0.15
35710,0.1,0.6,0.4
39310,0.2,-0.75,-0.3
8275,0.1,0.6,0.4


In [22]:
# Remove LMA features whose predictions are too different from the real one
index_for_removal = []
for ind in range(0,test_emotions.shape[0]):
    predicted = np.asarray([test_emotions.iloc[ind]])
    true = np.asarray([test_emotions_OG.iloc[ind]])
    
    #print(predicted)
    #print(true)
    
    mae_errors = mean_absolute_error(true, predicted, multioutput='raw_values')
    #print(mae_errors)
    
    if(mae_errors[0] > 0.2 or mae_errors[1] > 0.2 or mae_errors[2] > 0.2):
        index_for_removal.append(ind)
        continue
    
    mae_error = mean_absolute_error(true, predicted)
    #print(mae_error)
    
    if(mae_error > 0.11):
        index_for_removal.append(ind)
        continue
    
    #print()

#print(index_for_removal)


In [23]:
test_emotions_OG.drop(test_emotions_OG.index[index_for_removal], inplace=True)
print(test_emotions_OG.shape)
test_emotions_OG.head()

(7145, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
4986,0.05,-0.4,0.0
67270,-0.1,-0.55,-0.15
35710,0.1,0.6,0.4
39310,0.2,-0.75,-0.3
8275,0.1,0.6,0.4


In [24]:
test_emotions.drop(test_emotions.index[index_for_removal], inplace=True)
print(test_emotions.shape)
test_emotions.head()

(7145, 3)


Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
4986,0.080975,-0.416981,-0.031107
67270,-0.103397,-0.558096,-0.155531
35710,0.100493,0.590483,0.405745
39310,0.188988,-0.716403,-0.293775
8275,0.084309,0.532811,0.347444


In [25]:
test_dataset.drop(test_dataset.index[index_for_removal], inplace=True)
print(test_dataset.shape)
test_dataset.head()

(7145, 25)


Unnamed: 0,max_hand_distance,avg_l_hand_hip_distance,avg_r_hand_hip_distance,max_stride_length,avg_l_hand_chest_distance,avg_r_hand_chest_distance,avg_l_elbow_hip_distance,avg_r_elbow_hip_distance,avg_chest_pelvis_distance,avg_neck_chest_distance,...,l_hand_speed,r_hand_speed,l_foot_speed,r_foot_speed,neck_speed,l_hand_acceleration_magnitude,r_hand_acceleration_magnitude,l_foot_acceleration_magnitude,r_foot_acceleration_magnitude,neck_acceleration_magnitude
4986,0.328698,0.253966,0.129897,0.495469,0.333087,0.308586,0.124309,0.148232,0.236151,0.223894,...,-0.106444,-0.035288,-0.041723,-0.100805,-0.065227,0.012364,0.019117,0.04538,0.030781,0.010538
67270,0.531767,0.289772,0.242526,0.368007,0.37907,0.361337,0.285055,0.255162,0.236151,0.223894,...,-0.03793,-0.060126,-0.074519,-0.028618,-0.051816,0.004703,0.004447,0.01934,0.007784,0.003826
35710,0.393877,0.327816,0.252725,0.579857,0.287451,0.246881,0.260701,0.262741,0.236151,0.223894,...,-0.128026,-0.195212,-0.149618,-0.079184,-0.089123,0.040674,0.070294,0.040046,0.006031,0.018591
39310,0.862057,0.368667,0.493491,0.54799,0.441513,0.472957,0.280107,0.326665,0.236151,0.223894,...,-0.036013,-0.063232,-0.085516,-0.024034,-0.059926,0.014745,0.00876,0.029128,0.012121,0.006668
8275,0.537935,0.55667,0.186149,0.269048,0.414934,0.13082,0.210454,0.267332,0.236151,0.223894,...,-0.076367,-0.067095,-0.076864,-0.027243,-0.065505,0.022338,0.018786,0.007688,0.020716,0.011212


In [26]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [27]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [28]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_total_body_volume
avg_lower_body_volume
avg_upper_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## RandomSearchCV

In [29]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1, 0.15],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [0.85, 1, 1.25],
        'alpha': [0.0, 0.1, 0.25]
        }

n_iter = 150

In [30]:
models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, 
                    objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

In [31]:
# Pleasure
# run randomized search

random_searches = []

for i in range(5):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

max_hand_distance
GridSearchCV took 4242.63 seconds parameter settings.
0

avg_l_hand_hip_distance
GridSearchCV took 2946.63 seconds parameter settings.
1

avg_r_hand_hip_distance
GridSearchCV took 3627.81 seconds parameter settings.
2

max_stride_length
GridSearchCV took 3794.84 seconds parameter settings.
3

avg_l_hand_chest_distance
GridSearchCV took 2361.22 seconds parameter settings.
4



In [None]:
for i in range(5,10):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

avg_r_hand_chest_distance
GridSearchCV took 2642.89 seconds parameter settings.
5

avg_l_elbow_hip_distance
GridSearchCV took 3167.15 seconds parameter settings.
6

avg_r_elbow_hip_distance
GridSearchCV took 1975.50 seconds parameter settings.
7

avg_chest_pelvis_distance


In [None]:
for i in range(10,15):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

In [None]:
for i in range(15,20):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

In [None]:
for i in range(20,25):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=2, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_emotions, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    print(i)
    print()

### Results

In [40]:
best_regressors = []
for i in range(len(random_searches)):
    print(train_y_sets[i].columns[0])
    best_regressor = random_searches[i].best_estimator_

    print(best_regressor.get_params())
    best_regressors.append(best_regressor)
    print()

max_hand_distance
{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0.001, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 10, 'min_child_weight': 11, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1500, 'n_jobs': 12, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 0, 'reg_alpha': 0.25, 'reg_lambda': 0.850000024, 'scale_pos_weight': 1, 'subsample': 1, 'tree_method': 'gpu_hist', 'validate_parameters': 1, 'verbosity': None, 'lambda': 0.85, 'eta': 0.01, 'alpha': 0.25}

avg_l_hand_hip_distance
{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0.01, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', '

In [None]:
for i in range(len(best_regressors)):
    pred_y = best_regressors[i].predict(test_emotions)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

In [None]:
generated_features = []

for i in range(len(best_regressors)):
    pred_y = best_regressors[i].predict(test_emotions)
    generated_features.append(pred_y)
    
    
rows = []
for i in range(len(generated_features[0])):
    row = []
    for j in range(len(best_regressors)):
        row.append(generated_features[j][i])
    rows.append(row)
    
print(len(rows))
        
generated = pd.DataFrame(rows, columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated.head()

In [None]:
generated_scaled = scaler.transform(generated)

gen_emotions_p = model_p.predict(generated_scaled)
gen_emotions_a = model_a.predict(generated_scaled)
gen_emotions_d = model_d.predict(generated_scaled)

rows = []
for i in range(len(generated_scaled)):
    rows.append([gen_emotions_p[i], gen_emotions_a[i], gen_emotions_d[i]])

gen_emotions = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

gen_emotions.head()

In [49]:
import random

mae_errors = mean_absolute_error(test_emotions, gen_emotions, multioutput='raw_values')
mse_errors = mean_squared_error(test_emotions, gen_emotions, multioutput='raw_values')

features = ["PLEASURE", "AROUSAL", "DOMINANCE"
         ]

print("Overall MAE: " + str(mean_absolute_error(test_emotions, gen_emotions)))

print()
for i in range(len(mse_errors)):
    print("==" + features[i] + "==")
    print("MSE: %.5f" % mse_errors[i])
    print("MAE: %.5f" % mae_errors[i])
    print()
    
for i in range(30):
    row = random.randint(0, len(test_emotions))

    print("Real: " + str([test_emotions_OG.iloc[row,0], test_emotions_OG.iloc[row,1], test_emotions_OG.iloc[row,2]]))
    print("Predicted: " + str([test_emotions.iloc[row,0], test_emotions.iloc[row,1], test_emotions.iloc[row,2]]))
    print("Generated: " + str([gen_emotions.iloc[row,0], gen_emotions.iloc[row,1], gen_emotions.iloc[row,2]]))
    print()

Overall MAE: 0.24069016969103343

==PLEASURE==
MSE: 0.03477
MAE: 0.14409

==AROUSAL==
MSE: 0.19900
MAE: 0.33876

==DOMINANCE==
MSE: 0.10806
MAE: 0.23922

Real: [0.2, -0.75, -0.3]
Predicted: [0.23051218688488007, -0.6915867328643799, -0.3138922154903412]
Generated: [0.2925979, -0.33984184, -0.11208487]

Real: [-0.1, -0.55, -0.15]
Predicted: [-0.09983557462692261, -0.5563393831253052, -0.15231451392173767]
Generated: [0.116028085, -0.4984349, -0.1768014]

Real: [0.3, 0.4, 0.6]
Predicted: [0.3093508780002594, 0.40269967913627625, 0.5921486616134644]
Generated: [0.20004515, 0.04173456, -0.034392938]

Real: [0.6, 0.4, 0.1]
Predicted: [0.5813582539558411, 0.3744302988052368, 0.09587708860635757]
Generated: [0.1875925, -0.3106345, -0.10074129]

Real: [-0.1, -0.55, -0.15]
Predicted: [-0.10247635841369629, -0.5620092749595642, -0.14274999499320984]
Generated: [0.1378856, -0.31112975, -0.12550613]

Real: [0.2, -0.75, -0.3]
Predicted: [0.2088785022497177, -0.7416840195655823, -0.3041081428527832]

In [50]:
# save to JSON
for i in range(len(best_regressors)):
    best_regressors[i].save_model("models/bandai/xgb/" + test_y_sets[i].columns[0] +".json")

### Predictions

In [None]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/bandai_l2p_model_2.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/bandai_l2a_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/bandai_l2d_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/S_BANDAI_5frame.pkl') 

In [48]:
index = 10

real_coordinates = np.asarray([test_emotions.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(best_regressors[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

Real: [[-0.11073083 -0.53973973 -0.14922892]]
Predicted: [0.17201395, -0.49541107, -0.19870964]


In [45]:
real_coordinates = np.asarray([[0.5, 0.0, 0.0]])

generated_features = []
for i in range(len(models)):
    generated_features.append(best_regressors[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=["max_hand_distance",
          "avg_l_hand_hip_distance",
          "avg_r_hand_hip_distance",
          "max_stride_length",
          "avg_l_hand_chest_distance",
          "avg_r_hand_chest_distance",
          "avg_l_elbow_hip_distance",
          "avg_r_elbow_hip_distance",
          "avg_chest_pelvis_distance",
          "avg_neck_chest_distance",
          
          "avg_total_body_volume",
          "avg_lower_body_volume",
          "avg_upper_body_volume",
          
          "avg_triangle_area_hands_neck",
          "avg_triangle_area_feet_hips",
          
          "l_hand_speed",
          "r_hand_speed",
          "l_foot_speed",
          "r_foot_speed",
          "neck_speed",
          
          "l_hand_acceleration_magnitude",
          "r_hand_acceleration_magnitude",
          "l_foot_acceleration_magnitude",
          "r_foot_acceleration_magnitude",
          "neck_acceleration_magnitude",
    ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

Real: [[0.5 0.  0. ]]
Predicted: [0.37195557, -0.37101802, -0.111295834]
