# Model Fitting
Fit the model using the training dataset.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import numpy as np
from numpy import mean
from numpy import std

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from matplotlib.lines import Line2D
import joblib

from src.data.labels_util import load_labels, LabelCol, get_labels_file, load_clean_labels, get_workouts
from src.data.imu_util import (
    get_sensor_file, ImuCol, load_imu_data, Sensor, fix_epoch, resample_uniformly, time_to_row_range, get_data_chunk,
    normalize_with_bounds, data_to_features, list_imu_abspaths, clean_imu_data
)
from src.data.util import find_nearest, find_nearest_index, shift, low_pass_filter, add_col
from src.data.workout import Activity, Workout
from src.data.data import DataState
from src.data.build_features import main as build_features
from src.data.features_util import list_test_files
from src.model.train import evaluate_model_accuracy, train_model
from src.visualization.visualize import multiplot
from src.config import (
    TRAIN_BOOT_DIR, TRAIN_POLE_DIR, TRAIN_FEATURES_FILENAME, TRAIN_LABELS_FILENAME, BOOT_MODEL_FILE, 
    POLE_MODEL_FILE
)

# import data types
from pandas import DataFrame
from numpy import ndarray
from typing import List, Tuple, Optional

## Evaluate quality of model and training data
[Helpful source](https://machinelearningmastery.com/gradient-boosting-machine-ensemble-in-python/)

In [2]:
# UNCOMMENT to use. It's very slow.

# print('Evaluate boot model:')
# features: ndarray = np.load(TRAIN_BOOT_DIR / TRAIN_FEATURES_FILENAME)
# labels: ndarray = np.load(TRAIN_BOOT_DIR / TRAIN_LABELS_FILENAME)
    
# evaluate_model_accuracy(features, labels)

# print('Evaluate pole model:')
# features: ndarray = np.load(TRAIN_POLE_DIR / TRAIN_FEATURES_FILENAME)
# labels: ndarray = np.load(TRAIN_POLE_DIR / TRAIN_LABELS_FILENAME)
    
# evaluate_model_accuracy(features, labels)

## Train model on training data
Copy the models to the ```models``` directory and change the paths in ```config.py``` to "publish" the models.

In [3]:
# UNCOMMENT to use. It's very slow.

print('Train boot model:')
features: ndarray = np.load(TRAIN_BOOT_DIR / TRAIN_FEATURES_FILENAME)
labels: ndarray = np.load(TRAIN_BOOT_DIR / TRAIN_LABELS_FILENAME)
    
model = train_model(features, labels)
joblib.dump(model, 'gbm-boot-model.pkl')

print('Train pole model:')
features: ndarray = np.load(TRAIN_POLE_DIR / TRAIN_FEATURES_FILENAME)
labels: ndarray = np.load(TRAIN_POLE_DIR / TRAIN_LABELS_FILENAME)
    
model = train_model(features, labels)
joblib.dump(model, 'gbm-pole-model.pkl')

Train boot model:
Fitting model...
      Iter       Train Loss   Remaining Time 
         1           1.3349           20.80m
         2           1.3026           20.25m
         3           1.2791           20.66m
         4           1.2538           20.62m
         5           1.2324           20.34m
         6           1.2150           20.17m
         7           1.2009           19.62m
         8           1.1869           19.13m
         9           1.1749           18.73m
        10           1.1619           18.37m
        20           1.0735           15.82m
        30           1.0147           13.84m
        40           0.9749           11.67m
        50           0.9327            9.60m
        60           0.8987            7.62m
        70           0.8722            5.65m
        80           0.8483            3.83m
        90           0.8283            1.93m
       100           0.8131            0.00s
Done
Train pole model:
Fitting model...
      Iter       Train L

['gbm-pole-model.pkl']

## Test models