# Training

## Library Imports

In [None]:
import os
import pickle

import pandas as pd
from sklearn.ensemble import RandomForestClassifier

## Importing the Train and Test Sets

In [None]:
datasets_folder = f"{os.path.abspath(os.path.join(os.getcwd(), os.pardir))}/datasets"

In [None]:
X_train = pd.read_csv(os.path.join(datasets_folder, "obesity_X_train.csv"), index_col=0)
X_train

In [None]:
y_train = pd.read_csv(os.path.join(datasets_folder, "obesity_y_train.csv"), index_col=0)["Obese"]
y_train

In [None]:
X_test = pd.read_csv(os.path.join(datasets_folder, "obesity_X_test.csv"), index_col=0)
X_test

In [None]:
y_test = pd.read_csv(os.path.join(datasets_folder, "obesity_y_test.csv"), index_col=0)["Obese"]
y_test

### Dropping the `Height` and `Weight` Columns
As with the cross-validation, we make a variant of `X_train` without the `Height` and `Weight` columns. We do the same for `X_test`. We will train two different models for comparison.

In [None]:
X_train_no_hw = X_train.drop(["Height", "Weight"], axis=1)
X_train_no_hw

In [None]:
X_test_no_hw = X_test.drop(["Height", "Weight"], axis=1)
X_test_no_hw

## Training Random Forest Classifiers
The cross-validation results showed that the random forest, decision tree and SVM classifiers achieved high accuracies for both `X_train` and `X_train_no_hw`. The accuracies for the random forest were closer to each other, so we will train two random forest classifiers with the tuned hyperparameters.

In [None]:
RAND_STATE=0

In [None]:
rand_forest = RandomForestClassifier(max_depth=None, n_estimators=300, random_state=RAND_STATE)
rand_forest.fit(X_train, y_train)

In [None]:
rand_forest_no_weight = RandomForestClassifier(max_depth=None, n_estimators=200, random_state=RAND_STATE)
rand_forest_no_weight.fit(X_train_no_hw, y_train)

## Scores

In [None]:
rand_forest.score(X_test, y_test)

In [None]:
rand_forest_no_weight.score(X_test_no_hw, y_test)

## Saving the Trained Models
We define a function for saving a given model by pickling to a specified path:

In [None]:
def save_model(model, filename):
    file_path = f"{os.path.abspath(os.path.join(os.getcwd(), os.pardir))}/models/{filename}"
    with open(file_path, 'wb') as file: 
        pickle.dump(model, file)
    file.close()
    print(f"Model saved to {file_path}")

Finally, we save the trained models:

In [None]:
save_model(rand_forest, 'rand_forest.pkl')
save_model(rand_forest_no_weight, 'rand_forest_no_weight.pkl')