# XGBRegression (P,A,D coordinates to LMA Features)

This attempt tries to use only the most coorelated P,A,D coordinates per feature

F.eg if "stride" is only strongly correlated to P and D, then only those 2 emotions will be used for the regression

## Imports

In [100]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.lines as mlines

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import xgboost as xgb

xgb.set_config(verbosity=0)

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import normalize

from sklearn.utils import shuffle
import math
import joblib


## Load Data

In [140]:
dataset = pd.read_csv('datasets/Fs_B_DANCE_WALK_0.5sec.csv')

In [155]:
cor = dataset.corr()

coordinate_per_index = []
i = 0

for column in dataset:
    if(column in ["EMOTION_P", "EMOTION_A", "EMOTION_D"]):
        break
    
    #Correlation with output variable
    cor_target = abs(cor[column])
    print("===CORRELATION " + column +"===")
    print(cor_target[-3:])
    
    current = ''
    
    if(i in [10,13]):
        current = 'PA'
    elif(i in [11,12]):
        current = 'PAD'
    else:
        if(cor_target[-3] >= 0.01):
            current += 'P'
        if(cor_target[-2] >= 0.01):
            current += 'A'
        if(cor_target[-1] >= 0.01):
            current += 'D'
    
    coordinate_per_index.append(current)
    i+=1
    print()
    
print(coordinate_per_index)

===CORRELATION max_hand_distance===
EMOTION_P    0.352257
EMOTION_A    0.178646
EMOTION_D    0.294589
Name: max_hand_distance, dtype: float64

===CORRELATION avg_l_hand_hip_distance===
EMOTION_P    0.305158
EMOTION_A    0.113078
EMOTION_D    0.256505
Name: avg_l_hand_hip_distance, dtype: float64

===CORRELATION avg_r_hand_hip_distance===
EMOTION_P    0.279305
EMOTION_A    0.130497
EMOTION_D    0.205987
Name: avg_r_hand_hip_distance, dtype: float64

===CORRELATION max_stride_length===
EMOTION_P    0.099946
EMOTION_A    0.218093
EMOTION_D    0.151456
Name: max_stride_length, dtype: float64

===CORRELATION avg_l_hand_chest_distance===
EMOTION_P    0.260391
EMOTION_A    0.020581
EMOTION_D    0.181789
Name: avg_l_hand_chest_distance, dtype: float64

===CORRELATION avg_r_hand_chest_distance===
EMOTION_P    0.238151
EMOTION_A    0.049879
EMOTION_D    0.080738
Name: avg_r_hand_chest_distance, dtype: float64

===CORRELATION avg_l_elbow_hip_distance===
EMOTION_P    0.274435
EMOTION_A    0.040923

### Split into Test and Train data

In [157]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 14734
No Test Samples: 3684


### Split Features from Targets

In [158]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [159]:
train_PA = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A']], axis=1)
test_PA = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A']], axis=1)

train_PD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_D']], axis=1)
test_PD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_D']], axis=1)

train_AD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)
test_AD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)

train_AD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)
test_AD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)

train_PAD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_PAD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)



train_P = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P']], axis=1)
test_P = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P']], axis=1)

train_A = pd.concat([train_dataset.pop(x) for x in ['EMOTION_A']], axis=1)
test_A = pd.concat([test_dataset.pop(x) for x in ['EMOTION_A']], axis=1)

train_D = pd.concat([train_dataset.pop(x) for x in ['EMOTION_D']], axis=1)
test_D = pd.concat([test_dataset.pop(x) for x in ['EMOTION_D']], axis=1)

In [160]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [161]:
train_PAD.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
17307,-0.6,-0.3,-0.3
10575,0.9,-0.25,0.65
12136,-0.6,-0.3,-0.3
14161,0.7,0.2,0.2
14569,0.6,0.5,0.2


In [162]:
train_y_sets[0].head()

Unnamed: 0,max_hand_distance
17307,0.203921
10575,0.500407
12136,0.634007
14161,1.026982
14569,0.84476


In [163]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## Define Model

In [164]:
xgbr = xgb.XGBRegressor(verbosity=1) 

params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, learning_rate=0.05, max_depth=6, min_child_weight=11, 
                    reg_alpha=0.25, reg_lambda=1.25, gamma=0.01,
                    subsample=0.75, colsample_bytree=0.75, objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

## Train Model

In [165]:
for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    if(coordinate_per_index[i] == "P"):
        train_X = train_P.copy()
    elif(coordinate_per_index[i] == "A"):
        train_X = train_A.copy()
    elif(coordinate_per_index[i] == "D"):
        train_X = train_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        train_X = train_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        train_X = train_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        train_X = train_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        train_X = train_PAD.copy()
    
    models[i].fit(train_X, train_y_sets[i])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## Test Model

### Training Score

In [166]:
for i in range(len(models)):
    if(coordinate_per_index[i] == "P"):
        train_X = train_P.copy()
    elif(coordinate_per_index[i] == "A"):
        train_X = train_A.copy()
    elif(coordinate_per_index[i] == "D"):
        train_X = train_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        train_X = train_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        train_X = train_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        train_X = train_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        train_X = train_PAD.copy()
    
    score = models[i].score(train_X, train_y_sets[i])
    print(train_y_sets[i].columns[0])
    print("Training score: ", score)

    print()

max_hand_distance
Training score:  0.23146846907333007

avg_l_hand_hip_distance
Training score:  0.16036224727583837

avg_r_hand_hip_distance
Training score:  0.14669331228934346

max_stride_length
Training score:  0.07287234477852222

avg_l_hand_chest_distance
Training score:  0.11711214686596716

avg_r_hand_chest_distance
Training score:  0.10078046325171752

avg_l_elbow_hip_distance
Training score:  0.12127123353930835

avg_r_elbow_hip_distance
Training score:  0.10131780938399437

avg_chest_pelvis_distance
Training score:  -516264277.67370343

avg_neck_chest_distance
Training score:  -1.000614724411264e-08

avg_neck_rotation_w
Training score:  0.023746476193308608

avg_neck_rotation_x
Training score:  0.01881492430858378

avg_neck_rotation_y
Training score:  0.009175644209868494

avg_neck_rotation_z
Training score:  0.005967148167891234

avg_total_body_volume
Training score:  0.1054268615717242

avg_triangle_area_hands_neck
Training score:  0.08659074873107186

avg_triangle_area_fe

### Test Set MAE & MSE

In [152]:
for i in range(len(models)):
    if(coordinate_per_index[i] == "P"):
        test_X = test_P.copy()
    elif(coordinate_per_index[i] == "A"):
        test_X = test_A.copy()
    elif(coordinate_per_index[i] == "D"):
        test_X = test_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        test_X = test_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        test_X = test_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        test_X = test_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        test_X = test_PAD.copy()
    
    pred_y = models[i].predict(test_X)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

max_hand_distance
MSE: 0.10
MAE: 0.26
Example:  0.2971068894484614

avg_l_hand_hip_distance
MSE: 0.04
MAE: 0.16
Example:  0.4406424650977664

avg_r_hand_hip_distance
MSE: 0.04
MAE: 0.16
Example:  0.4571307380177448

max_stride_length
MSE: 0.04
MAE: 0.15
Example:  0.7511384705247149

avg_l_hand_chest_distance
MSE: 0.01
MAE: 0.09
Example:  0.4436210999553046

avg_r_hand_chest_distance
MSE: 0.01
MAE: 0.09
Example:  0.4658910917058016

avg_l_elbow_hip_distance
MSE: 0.01
MAE: 0.07
Example:  0.3324378317202933

avg_r_elbow_hip_distance
MSE: 0.01
MAE: 0.07
Example:  0.3415790056596676

avg_chest_pelvis_distance
MSE: 0.00
MAE: 0.00
Example:  0.2861510012361181

avg_neck_chest_distance
MSE: 0.00
MAE: 0.00
Example:  0.2776996498898499

avg_neck_rotation_w
MSE: 0.02
MAE: 0.11
Example:  -0.0631955003829816

avg_neck_rotation_x
MSE: 0.22
MAE: 0.39
Example:  -0.2514155544486351

avg_neck_rotation_y
MSE: 0.04
MAE: 0.13
Example:  0.0046916840643594

avg_neck_rotation_z
MSE: 0.12
MAE: 0.27
Example:  0.

### Prediction Examples

In [116]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_S_DANCE_WALK_KIN_0.5sec.pkl') 

In [168]:
index = 1
test_X = test_PAD.copy()

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    if(coordinate_per_index[i] == "P"):
        test_X = test_P.copy()
    elif(coordinate_per_index[i] == "A"):
        test_X = test_A.copy()
    elif(coordinate_per_index[i] == "D"):
        test_X = test_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        test_X = test_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        test_X = test_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        test_X = test_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        test_X = test_PAD.copy()
        
    real_coordinates = np.asarray([test_X.iloc[index]])
    
    generated_features.append(models[i].predict(real_coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=[
            "max_hand_distance",
            "avg_l_hand_hip_distance",
            "avg_r_hand_hip_distance",
            "max_stride_length",
            "avg_l_hand_chest_distance",
            "avg_r_hand_chest_distance",
            "avg_l_elbow_hip_distance",
            "avg_r_elbow_hip_distance",
            "avg_chest_pelvis_distance",
            "avg_neck_chest_distance",
            "avg_neck_rotation_w", "avg_neck_rotation_x", "avg_neck_rotation_y", "avg_neck_rotation_z",
            "avg_total_body_volume",
            "avg_triangle_area_hands_neck",
            "avg_triangle_area_feet_hips",
          
            "l_hand_speed",
            "r_hand_speed",
            "l_foot_speed",
            "r_foot_speed",
            "neck_speed",
          
            "l_hand_acceleration_magnitude",
            "r_hand_acceleration_magnitude",
            "l_foot_acceleration_magnitude",
            "r_foot_acceleration_magnitude",
            "neck_acceleration_magnitude",
         ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

test_X = test_PAD.copy()
real_coordinates = np.asarray([test_X.iloc[index]])

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

Real: [[-0.5  0.6  0.9]]
Predicted: [0.4269813, 0.45519087, 0.26436406]


## Save Models

In [118]:
# save to JSON
model_p.save_model("models/l2p_dance_model.json")
model_a.save_model("models/l2a_dance_model.json")
model_d.save_model("models/l2d_dance_model.json")

## Load Models

In [856]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("models/l2p_dance_model_v2.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("models/l2a_dance_model_v2.json")

# Hyperparameter Grid Search with XGBoost

In [68]:
from datetime import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorflow as tf

import sklearn
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_absolute_error


import xgboost as xgb

xgb.set_config(verbosity=2)

from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import joblib

## Load Data

In [69]:
dataset = pd.read_csv('datasets/Fs_B_DANCE_WALK_KIN_0.5sec.csv')

In [70]:
cor = dataset.corr()

coordinate_per_index = []
i = 0

for column in dataset:
    if(column in ["EMOTION_P", "EMOTION_A", "EMOTION_D"]):
        break
    
    #Correlation with output variable
    cor_target = abs(cor[column])
    print("===CORRELATION " + column +"===")
    print(cor_target[-3:])
    
    current = ''
    
    if(i in [10,13]):
        current = 'PA'
    elif(i in [11,12]):
        current = 'PAD'
    else:
        if(cor_target[-3] >= 0.1):
            current += 'P'
        if(cor_target[-2] >= 0.1):
            current += 'A'
        if(cor_target[-1] >= 0.1):
            current += 'D'
    
    coordinate_per_index.append(current)
    i+=1
    print()
    
print(coordinate_per_index)

===CORRELATION max_hand_distance===
EMOTION_P    0.280149
EMOTION_A    0.085109
EMOTION_D    0.231527
Name: max_hand_distance, dtype: float64

===CORRELATION avg_l_hand_hip_distance===
EMOTION_P    0.257441
EMOTION_A    0.027082
EMOTION_D    0.082661
Name: avg_l_hand_hip_distance, dtype: float64

===CORRELATION avg_r_hand_hip_distance===
EMOTION_P    0.218035
EMOTION_A    0.062500
EMOTION_D    0.085979
Name: avg_r_hand_hip_distance, dtype: float64

===CORRELATION max_stride_length===
EMOTION_P    0.238186
EMOTION_A    0.045871
EMOTION_D    0.109743
Name: max_stride_length, dtype: float64

===CORRELATION avg_l_hand_chest_distance===
EMOTION_P    0.196421
EMOTION_A    0.029508
EMOTION_D    0.137228
Name: avg_l_hand_chest_distance, dtype: float64

===CORRELATION avg_r_hand_chest_distance===
EMOTION_P    0.155722
EMOTION_A    0.011035
EMOTION_D    0.114523
Name: avg_r_hand_chest_distance, dtype: float64

===CORRELATION avg_l_elbow_hip_distance===
EMOTION_P    0.205301
EMOTION_A    0.020649

### Split into Test and Train data

In [71]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)

print("No Training Samples:",train_dataset.shape[0])
print("No Test Samples:",test_dataset.shape[0])

train_dataset = shuffle(train_dataset)
test_dataset = shuffle(test_dataset)

No Training Samples: 39539
No Test Samples: 9885


### Split Features from Targets

In [72]:
train_y_sets = []
test_y_sets = []
# List of np.arrays each containing the LMA features of a given column (i.e one list per feature)

In [73]:
train_PA = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A']], axis=1)
test_PA = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A']], axis=1)

train_PD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_D']], axis=1)
test_PD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_D']], axis=1)

train_AD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)
test_AD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)

train_AD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)
test_AD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_A', 'EMOTION_D']], axis=1)

train_PAD = pd.concat([train_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)
test_PAD = pd.concat([test_dataset.copy().pop(x) for x in ['EMOTION_P', 'EMOTION_A', 'EMOTION_D']], axis=1)



train_P = pd.concat([train_dataset.pop(x) for x in ['EMOTION_P']], axis=1)
test_P = pd.concat([test_dataset.pop(x) for x in ['EMOTION_P']], axis=1)

train_A = pd.concat([train_dataset.pop(x) for x in ['EMOTION_A']], axis=1)
test_A = pd.concat([test_dataset.pop(x) for x in ['EMOTION_A']], axis=1)

train_D = pd.concat([train_dataset.pop(x) for x in ['EMOTION_D']], axis=1)
test_D = pd.concat([test_dataset.pop(x) for x in ['EMOTION_D']], axis=1)

In [74]:
for colname in train_dataset.columns:
    train_y_sets.append(pd.concat([train_dataset.pop(x) for x in [colname]], axis=1))
    
for colname in test_dataset.columns:
    test_y_sets.append(pd.concat([test_dataset.pop(x) for x in [colname]], axis=1))

In [75]:
train_PAD.head()

Unnamed: 0,EMOTION_P,EMOTION_A,EMOTION_D
23605,-0.4,0.25,-0.1
29692,0.1,-0.7,-0.2
7430,-0.5,0.6,0.9
10458,0.5,0.7,0.4
4584,-0.4,0.25,-0.1


In [76]:
train_y_sets[0].head()

Unnamed: 0,max_hand_distance
23605,0.43344
29692,0.330896
7430,0.47327
10458,1.311071
4584,0.484142


In [77]:
for i in range(len(train_y_sets)):
    print(train_y_sets[i].columns[0])

max_hand_distance
avg_l_hand_hip_distance
avg_r_hand_hip_distance
max_stride_length
avg_l_hand_chest_distance
avg_r_hand_chest_distance
avg_l_elbow_hip_distance
avg_r_elbow_hip_distance
avg_chest_pelvis_distance
avg_neck_chest_distance
avg_neck_rotation_w
avg_neck_rotation_x
avg_neck_rotation_y
avg_neck_rotation_z
avg_total_body_volume
avg_triangle_area_hands_neck
avg_triangle_area_feet_hips
l_hand_speed
r_hand_speed
l_foot_speed
r_foot_speed
neck_speed
l_hand_acceleration_magnitude
r_hand_acceleration_magnitude
l_foot_acceleration_magnitude
r_foot_acceleration_magnitude
neck_acceleration_magnitude


## RandomSearchCV

In [78]:
# A parameter grid for XGBoost
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
params = {
        'eta': [0.01, 0.05, 0.1],
        'min_child_weight': [1, 5, 11, 21],
        'max_depth': [3, 6, 10, 15],
        'gamma': [0, 0.001, 0.01],
        'subsample': [0.75, 1],
        'colsample_bytree': [0.75, 1],
        'lambda': [1, 1.25],
        'alpha': [0.0, 0.25]
        }

n_iter = 200

In [79]:
models = []

for i in range(len(train_y_sets)):
    models.append(
        xgb.XGBRegressor(
                    n_estimators=1500, 
                    objective="reg:squarederror",
                    tree_method='gpu_hist'
                )
    )

In [82]:
# Pleasure
# run randomized search

random_searches = []

for i in range(len(models)):
    if(coordinate_per_index[i] == "P"):
        train_X = train_P.copy()
    elif(coordinate_per_index[i] == "A"):
        train_X = train_A.copy()
    elif(coordinate_per_index[i] == "D"):
        train_X = train_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        train_X = train_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        train_X = train_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        train_X = train_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        train_X = train_PAD.copy()
    
    print("TRAINING - " + train_y_sets[i].columns[0] + "- USING - " + coordinate_per_index[i])
    
    kfold = KFold(n_splits=3, shuffle=True)

    random_search = RandomizedSearchCV(models[i], param_distributions=params,
                               cv=kfold, scoring='neg_mean_squared_error', n_iter = n_iter)

    start = time.time()
    random_search.fit(train_X, train_y_sets[i])
    
    random_searches.append(random_search)

    print("GridSearchCV took %.2f seconds"
          " parameter settings." % ((time.time() - start)))
    
    print()

TRAINING - max_hand_distance- USING - PD
GridSearchCV took 997.94 seconds parameter settings.

TRAINING - avg_l_hand_hip_distance- USING - P
GridSearchCV took 904.12 seconds parameter settings.

TRAINING - avg_r_hand_hip_distance- USING - P
GridSearchCV took 868.42 seconds parameter settings.

TRAINING - max_stride_length- USING - PD
GridSearchCV took 947.38 seconds parameter settings.

TRAINING - avg_l_hand_chest_distance- USING - PD
GridSearchCV took 884.58 seconds parameter settings.

TRAINING - avg_r_hand_chest_distance- USING - PD
GridSearchCV took 881.88 seconds parameter settings.

TRAINING - avg_l_elbow_hip_distance- USING - PD
GridSearchCV took 840.15 seconds parameter settings.

TRAINING - avg_r_elbow_hip_distance- USING - PD
GridSearchCV took 801.80 seconds parameter settings.

TRAINING - avg_chest_pelvis_distance- USING - P
GridSearchCV took 572.93 seconds parameter settings.

TRAINING - avg_neck_chest_distance- USING - PA
GridSearchCV took 656.92 seconds parameter settings

### Results

In [84]:
best_regressors = []

for i in range(len(models)):
    print(train_y_sets[i].columns[0])
    best_regressor = random_searches[i].best_estimator_

    print(best_regressor.get_params())
    best_regressors.append(best_regressor)
    print()

max_hand_distance
{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.75, 'enable_categorical': False, 'gamma': 0.01, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.100000001, 'max_delta_step': 0, 'max_depth': 3, 'min_child_weight': 5, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1500, 'n_jobs': 12, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 0, 'reg_alpha': 0.25, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.75, 'tree_method': 'gpu_hist', 'validate_parameters': 1, 'verbosity': None, 'lambda': 1, 'eta': 0.1, 'alpha': 0.25}

avg_l_hand_hip_distance
{'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0.01, 'gpu_id': 0, 'importance_type': None, 'interaction_constraints': '', 'learning_rate

In [85]:
for i in range(len(best_regressors)):
    if(coordinate_per_index[i] == "P"):
        test_X = test_P.copy()
    elif(coordinate_per_index[i] == "A"):
        test_X = test_A.copy()
    elif(coordinate_per_index[i] == "D"):
        test_X = test_D.copy()
        
    elif(coordinate_per_index[i] == "PA"):
        test_X = test_PA.copy()
    elif(coordinate_per_index[i] == "AD"):
        test_X = test_AD.copy()
    elif(coordinate_per_index[i] == "PD"):
        test_X = test_PD.copy()
        
    elif(coordinate_per_index[i] == "PAD"):
        test_X = test_PAD.copy()
        
    pred_y = best_regressors[i].predict(test_X)
    mse = mean_squared_error(test_y_sets[i], pred_y)
    mae = mean_absolute_error(test_y_sets[i], pred_y)
    print(test_y_sets[i].columns[0])
    print("MSE: %.2f" % mse)
    print("MAE: %.2f" % mae)
    print("Example: ", test_y_sets[i].iloc[0][0])
    print()

max_hand_distance
MSE: 0.07
MAE: 0.21
Example:  0.4989853318602207

avg_l_hand_hip_distance
MSE: 0.03
MAE: 0.12
Example:  0.2267434865368751

avg_r_hand_hip_distance
MSE: 0.03
MAE: 0.13
Example:  0.3563965554709268

max_stride_length
MSE: 0.03
MAE: 0.13
Example:  0.4213194217438191

avg_l_hand_chest_distance
MSE: 0.01
MAE: 0.09
Example:  0.447102203674375

avg_r_hand_chest_distance
MSE: 0.01
MAE: 0.09
Example:  0.3757017571310203

avg_l_elbow_hip_distance
MSE: 0.01
MAE: 0.06
Example:  0.3206657092611353

avg_r_elbow_hip_distance
MSE: 0.01
MAE: 0.06
Example:  0.2965667045547567

avg_chest_pelvis_distance
MSE: 0.00
MAE: 0.00
Example:  0.2861510000148867

avg_neck_chest_distance
MSE: 0.00
MAE: 0.00
Example:  0.2775065382457419

avg_neck_rotation_w
MSE: 0.01
MAE: 0.07
Example:  -0.0363429129118264

avg_neck_rotation_x
MSE: 0.10
MAE: 0.21
Example:  0.2231649475888516

avg_neck_rotation_y
MSE: 0.02
MAE: 0.10
Example:  -0.0692101839821917

avg_neck_rotation_z
MSE: 0.06
MAE: 0.13
Example:  0.9

In [18]:
# save to JSON
for i in range(len(best_regressors)):
    best_regressors[i].save_model("models/0.5_sec/" + test_y_sets[i].columns[0] +".json")

### Predictions

In [86]:
model_p = xgb.XGBRegressor(verbosity=0)
model_p.load_model("../../emotion_classifier/model_training/models/l2p_dance_model.json")

model_a = xgb.XGBRegressor(verbosity=0)
model_a.load_model("../../emotion_classifier/model_training/models/l2a_dance_model.json")

model_d = xgb.XGBRegressor(verbosity=0)
model_d.load_model("../../emotion_classifier/model_training/models/l2d_dance_model.json")

scaler = joblib.load('../../emotion_classifier/model_training/datasets/scalers/standardizers/Fs_B_S_DANCE_WALK_KIN_0.5sec.pkl') 

In [99]:
index = 10
test_X = test_PAD.copy()

real_coordinates = np.asarray([test_X.iloc[index]])

generated_features = []
for i in range(len(models)):
    coordinates = real_coordinates
    print(coordinate_per_index[i])
    if(coordinate_per_index[i] == "P"):
        coordinates = [[coordinates[0][0]]]
    elif(coordinate_per_index[i] == "A"):
        coordinates = [[coordinates[0][1]]]
    elif(coordinate_per_index[i] == "D"):
        coordinates = [[coordinates[0][2]]]
        
    elif(coordinate_per_index[i] == "PA"):
        coordinates = [[coordinates[0][0], coordinates[0][1]]]
    elif(coordinate_per_index[i] == "AD"):
        coordinates = [[coordinates[0][1], coordinates[0][2]]]
    elif(coordinate_per_index[i] == "PD"):
        coordinates = [[coordinates[0][0], coordinates[0][2]]]
        
    elif(coordinate_per_index[i] == "PAD"):
        coordinates = [[coordinates[0][0], coordinates[0][1], coordinates[0][2]]]
        
    print(coordinates)
        
    generated_features.append(best_regressors[i].predict(coordinates)[0])
    #print(test_y_sets[i].columns[0], " - ", generated_features[i])
    
generated_features = pd.DataFrame([generated_features], columns=[
            "max_hand_distance",
            "avg_l_hand_hip_distance",
            "avg_r_hand_hip_distance",
            "max_stride_length",
            "avg_l_hand_chest_distance",
            "avg_r_hand_chest_distance",
            "avg_l_elbow_hip_distance",
            "avg_r_elbow_hip_distance",
            "avg_chest_pelvis_distance",
            "avg_neck_chest_distance",
            "avg_neck_rotation_w", "avg_neck_rotation_x", "avg_neck_rotation_y", "avg_neck_rotation_z",
            "avg_total_body_volume",
            "avg_triangle_area_hands_neck",
            "avg_triangle_area_feet_hips",
          
            "l_hand_speed",
            "r_hand_speed",
            "l_foot_speed",
            "r_foot_speed",
            "neck_speed",
          
            "l_hand_acceleration_magnitude",
            "r_hand_acceleration_magnitude",
            "l_foot_acceleration_magnitude",
            "r_foot_acceleration_magnitude",
            "neck_acceleration_magnitude",
         ])

generated_features = scaler.transform(generated_features)

y_p = model_p.predict(generated_features)
y_a = model_a.predict(generated_features)
y_d = model_d.predict(generated_features)

print('Real: %s' % np.asarray(real_coordinates))
print('Predicted: %s' % [y_p[0], y_a[0], y_d[0]])

PD
[[0.5, 0.4]]


ValueError: training data did not have the following fields: EMOTION_P, EMOTION_D