# Build Models
1. Use 20 features and 4 labels from features.csv
2. Try different models

## 1. Initialization

In [1]:
step = 3
processed_folder = r".\ProcessedData\Dataset_Step=" + str(step)
model_folder = processed_folder + r"\models"
features_path = processed_folder + r"\features.csv"

In [None]:
import pandas as pd
import os
import numpy as np
from enum import Enum
from sklearn.model_selection import train_test_split
import joblib 
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor, \
BaggingRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor, \
VotingRegressor
import lightgbm as lgb
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

## 2. Prepare data

In [5]:
class FingerType(Enum):
    Thumb = 0
    Index = 1

finger_type = FingerType.Thumb

In [6]:
features_pd = pd.read_csv(features_path)
X = features_pd.iloc[:, 0: 20].to_numpy()
if finger_type == FingerType.Thumb:
    Y =  features_pd.iloc[:, 20: 22].to_numpy()
elif finger_type == FingerType.Index:
    Y =  features_pd.iloc[:, 22: 24].to_numpy()

print("Original dataset shape")
print("X:", X.shape, " Y:", Y.shape)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
print("Train dataset shape")
print("X:", X_train.shape, " Y:", Y_train.shape)
print("Test dataset shape")
print("X:", X_test.shape, " Y:", Y_test.shape)

Original dataset shape
X: (54268, 20)  Y: (54268, 2)
Train dataset shape
X: (43414, 20)  Y: (43414, 2)
Test dataset shape
X: (10854, 20)  Y: (10854, 2)


## 3. Prepare the regression model pipeline

In [7]:
def generate_model(single_model):
    model = MultiOutputRegressor(single_model)
    model.fit(X_train, Y_train)
    score = model.score(X_test, Y_test)
    model_path = model_folder + r"\\" +  \
                    str(round(score, 3)) + r"_" +  \
                    str(model.get_params()['estimator']).split('(')[0]
    joblib.dump(model, model_path)
    print(round(score * 100, 1), "%")
    print("Save model file", model_path)
    return model, model_path

## 4. Use specific model

In [12]:
# single_model = SVR(kernel='linear', C=1.0, epsilon=0.2, max_iter=10000)
# single_model = SVR(kernel='poly', C=1.0, epsilon=0.2, max_iter=10000)
# single_model = SVR(kernel='sigmoid', C=1.0, epsilon=0.2, tol=0.1)
# single_model = LogisticRegression(random_state=0)

# ** single_model = RandomForestRegressor(max_depth=20)

# single_model = LinearRegression()

# single_model = AdaBoostRegressor(random_state=0, n_estimators=100, loss='square')
# single_model = BaggingRegressor(base_estimator=SVR(), n_estimators=10, random_state=0)

# ** single_model = GradientBoostingRegressor(n_estimators=10000)
# ** single_model = HistGradientBoostingRegressor(max_iter=10000)

# r1 = RandomForestRegressor(max_depth=None, random_state=None)
# r2 = GradientBoostingRegressor(n_estimators=10000)
# r3 = HistGradientBoostingRegressor(max_iter=10000)
# ** single_model = VotingRegressor([('RF', r1), ('GB', r2), ('HGB', r3)])

# LR1 = LinearRegression()
# LR2 = LinearRegression()
# LR3 = LinearRegression()
# single_model = VotingRegressor([('LR1', r1), ('LR2', r2), ('LR3', r3)])

# ** single_model = lgb.LGBMRegressor(boosting_type='dart', num_leaves=100, n_estimators=10000)

# single_model = MLPRegressor()

kernel = DotProduct() + WhiteKernel()
single_model = GaussianProcessRegressor(kernel=kernel, random_state=0)

In [None]:
model, model_path = generate_model(single_model)

## 5. Load model from disk and check again

In [60]:
step = 4
features_pd = pd.read_csv(r".\ProcessedData\Dataset_Step=" + str(step) + r"\features.csv")
X_a = features_pd.iloc[:, 0: 20].to_numpy()
if finger_type == FingerType.Thumb:
    Y_a =  features_pd.iloc[:, 20: 22].to_numpy()
elif finger_type == FingerType.Index:
    Y_a =  features_pd.iloc[:, 22: 24].to_numpy()
print("Original dataset shape")
print("X:", X_a.shape, " Y:", Y_a.shape)

Original dataset shape
X: (420, 20)  Y: (420, 2)


In [4]:
model_path = r'./ProcessedData/Dataset_Step=3/models/0.893_LGBMRegressor'
model = joblib.load(model_path)
# model.get_params()

In [62]:
score = model.score(X_a, Y_a)
print(round(score * 100, 1), "%")

89.2 %


In [7]:
import os

files = os.listdir(model_folder)
for file in files:    
    parts = file.split('_')
    file_name = '_'.join(parts[::-1])
#     print(model_folder + r'\\' + file, file_name)
    os.rename(model_folder + r'\\' + file, file_name)