# XGBRegression (P,A,D coordinates to LMA Features)

In [34]:
# Current best:
#datasets/pad/Fs_B_S_DANCE_WALK_0.5sec.csv
"""
Using the identified emotions of each LMA feature set rather than the normal ones
"""

'\nUsing the identified emotions of each LMA feature set rather than the normal ones\n'

## Imports

In [35]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib


In [36]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model_O.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model_O.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model_O.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_O_S_DANCE_WALK_KIN_0.5sec.pkl') 

## Load Data

In [37]:
dataset = pd.read_csv('datasets/Fs_B_O_DANCE_WALK_KIN_0.5sec.csv')

### Split into Test and Train data

In [38]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 32147
No Test Samples: 8037


### Split Features from Targets

In [39]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [40]:
train_X = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_X = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)

In [41]:
train_X_p = model_p.predict(train_dataset)
train_X_a = model_a.predict(train_dataset)
train_X_d = model_d.predict(train_dataset)

rows = []
for i in range(len(train_dataset)):
    rows.append([train_X_p[i], train_X_a[i], train_X_d[i]])

train_X = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

train_X.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.342767,0.384754,0.316698
1,0.444786,0.358585,0.144301
2,0.38693,0.381998,0.301604
3,0.42829,0.447306,0.404004
4,0.342987,0.426325,0.312238


In [42]:
test_X_p = model_p.predict(test_dataset)
test_X_a = model_a.predict(test_dataset)
test_X_d = model_d.predict(test_dataset)

rows = []
for i in range(len(test_dataset)):
    rows.append([test_X_p[i], test_X_a[i], test_X_d[i]])

test_X = pd.DataFrame(rows, columns=[
            "EMOTION_P", "EMOTION_A", "EMOTION_D"
         ])

test_X.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.365162,0.373706,0.242617
1,0.335886,0.504628,0.120682
2,0.041402,0.44404,0.197123
3,0.176047,0.49521,0.11004
4,0.20954,0.284437,0.131879


In [43]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [44]:
train_X.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
0,0.342767,0.384754,0.316698
1,0.444786,0.358585,0.144301
2,0.38693,0.381998,0.301604
3,0.42829,0.447306,0.404004
4,0.342987,0.426325,0.312238


In [45]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## Define Model

In [46]:
xgbr = xgb.XGBRegressor(verbosity=1) 

params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=6, min_child_weight=11, 
                    reg_alpha=0.25, reg_lambda=1.25, gamma=0.01,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

## Train Model

In [47]:
for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    models[i].fit(train_X, train_y_sets[i])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## Test Model

### Training Score

In [48]:
for i in range(len(models)):
    score = models[i].score(train_X, train_y_sets[i])
    print(train_y_sets[i].columns[0])
    print("Training score: ", score)

    print()

max_hand_distance
Training score:  0.5399454354817284

avg_l_hand_hip_distance
Training score:  0.4034339633612394

avg_r_hand_hip_distance
Training score:  0.404682199760545

max_stride_length
Training score:  0.4405451156625565

avg_l_hand_chest_distance
Training score:  0.3284601522580578

avg_r_hand_chest_distance
Training score:  0.2921378720712525

avg_l_elbow_hip_distance
Training score:  0.23395770745727196

avg_r_elbow_hip_distance
Training score:  0.18397757475642362

avg_chest_pelvis_distance
Training score:  -1578703867.2680929

avg_neck_chest_distance
Training score:  -6.993535959232133e-07

avg_neck_rotation_w
Training score:  0.17772657403159464

avg_neck_rotation_x
Training score:  0.48026830850439983

avg_neck_rotation_y
Training score:  0.3391175938771205

avg_neck_rotation_z
Training score:  0.35585191663273885

avg_total_body_volume
Training score:  0.47078048052319876

avg_triangle_area_hands_neck
Training score:  0.13766400353313235

avg_triangle_area_feet_hips
Tr

### Test Set MAE & MSE

In [49]:
for i in range(len(models)):
    pred_y = models[i].predict(test_X)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

max_hand_distance
MSE: 0.04
MAE: 0.16
Example:  1.0466280553608256

avg_l_hand_hip_distance
MSE: 0.02
MAE: 0.10
Example:  0.5211666045200023

avg_r_hand_hip_distance
MSE: 0.02
MAE: 0.11
Example:  0.362042134709937

max_stride_length
MSE: 0.02
MAE: 0.11
Example:  0.6026755506994144

avg_l_hand_chest_distance
MSE: 0.01
MAE: 0.07
Example:  0.5913874289649005

avg_r_hand_chest_distance
MSE: 0.01
MAE: 0.08
Example:  0.3621781709403662

avg_l_elbow_hip_distance
MSE: 0.00
MAE: 0.05
Example:  0.4644061740628424

avg_r_elbow_hip_distance
MSE: 0.00
MAE: 0.05
Example:  0.2968889601535709

avg_chest_pelvis_distance
MSE: 0.00
MAE: 0.00
Example:  0.2861509998422412

avg_neck_chest_distance
MSE: 0.00
MAE: 0.00
Example:  0.2734155747860033

avg_neck_rotation_w
MSE: 0.01
MAE: 0.06
Example:  -0.025106319761402

avg_neck_rotation_x
MSE: 0.05
MAE: 0.15
Example:  -0.4618695550435093

avg_neck_rotation_y
MSE: 0.01
MAE: 0.08
Example:  -0.1799121118894527

avg_neck_rotation_z
MSE: 0.01
MAE: 0.05
Example:  0.8

### Prediction Examples

In [52]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model_O.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model_O.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model_O.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_O_S_DANCE_WALK_KIN_0.5sec.pkl') 

In [57]:
index = 4

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(models[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=[
            "max_hand_distance",
            "avg_l_hand_hip_distance",
            "avg_r_hand_hip_distance",
            "max_stride_length",
            "avg_l_hand_chest_distance",
            "avg_r_hand_chest_distance",
            "avg_l_elbow_hip_distance",
            "avg_r_elbow_hip_distance",
            "avg_chest_pelvis_distance",
            "avg_neck_chest_distance",
            "avg_neck_rotation_w", "avg_neck_rotation_x", "avg_neck_rotation_y", "avg_neck_rotation_z",
            "avg_total_body_volume",
            "avg_triangle_area_hands_neck",
            "avg_triangle_area_feet_hips",
          
            "l_hand_speed",
            "r_hand_speed",
            "l_foot_speed",
            "r_foot_speed",
            "neck_speed",
          
            "l_hand_acceleration_magnitude",
            "r_hand_acceleration_magnitude",
            "l_foot_acceleration_magnitude",
            "r_foot_acceleration_magnitude",
            "neck_acceleration_magnitude",
         ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

Real: [[0.20954038 0.284437   0.13187866]]
Predicted: [-0.36244485, 0.017026702, 0.01105946]


## Save Models

In [118]:
# save to JSON
model_p.save_model("models/l2p_dance_model.json")
model_a.save_model("models/l2a_dance_model.json")
model_d.save_model("models/l2d_dance_model.json")

## Load Models

In [856]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("models/l2p_dance_model_v2.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("models/l2a_dance_model_v2.json")

# Hyperparameter Grid Search with XGBoost

In [58]:
from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_absolute_error


import xgboost as xgb

xgb.set_config(verbosity=2)

from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import joblib

## Load Data

In [3]:
dataset = pd.read_csv('datasets/Fs_B_DANCE_WALK_KIN_0.5sec.csv')

### Split into Test and Train data

In [4]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 39539
No Test Samples: 9885


### Split Features from Targets

In [5]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [6]:
train_X = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_X = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)

In [7]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [8]:
train_X.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
7012,-0.85,-0.1,-0.8
19814,0.9,-0.25,0.65
423,-0.4,0.25,-0.1
45807,-0.5,0.6,0.9
28216,-0.35,0.7,-0.8


In [9]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## RandomSearchCV

In [59]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

n_iter = 200

In [60]:
models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, 
                    objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

In [None]:
# Pleasure
# run randomized search

random_searches = []

for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    
    kfold = KFold(n_splits=3, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_X, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    
    print()

max_hand_distance


### Results

In [None]:
best_regressors = []

for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    best_regressor = random_searches[i].best_estimator_

    print(best_regressor.get_params())
    best_regressors.append(best_regressor)
    print()

In [None]:
for i in range(len(best_regressors)):
    pred_y = best_regressors[i].predict(test_X)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

In [None]:
# save to JSON
for i in range(len(best_regressors)):
    best_regressors[i].save_model("models/0.5_sec/" + test_y_sets[i].columns[0] +".json")

### Predictions

In [46]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_S_DANCE_WALK_KIN_0.5sec.pkl') 

In [47]:
index = 10

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    generated_features.append(best_regressors[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=[
            "max_hand_distance",
            "avg_l_hand_hip_distance",
            "avg_r_hand_hip_distance",
            "max_stride_length",
            "avg_l_hand_chest_distance",
            "avg_r_hand_chest_distance",
            "avg_l_elbow_hip_distance",
            "avg_r_elbow_hip_distance",
            "avg_chest_pelvis_distance",
            "avg_neck_chest_distance",
            "avg_neck_rotation_w", "avg_neck_rotation_x", "avg_neck_rotation_y", "avg_neck_rotation_z",
            "avg_total_body_volume",
            "avg_triangle_area_hands_neck",
            "avg_triangle_area_feet_hips",
          
            "l_hand_speed",
            "r_hand_speed",
            "l_foot_speed",
            "r_foot_speed",
            "neck_speed",
          
            "l_hand_acceleration_magnitude",
            "r_hand_acceleration_magnitude",
            "l_foot_acceleration_magnitude",
            "r_foot_acceleration_magnitude",
            "neck_acceleration_magnitude",
         ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

Real: [[ 0.9  -0.25  0.65]]
Predicted: [0.45704892, 0.14212963, 0.21817927]
