# Training

## Library Imports

In [None]:
import os
import pickle

import pandas as pd
from sklearn.ensemble import RandomForestClassifier

## Importing the Train Set

In [None]:
datasets_folder = f"{os.path.abspath(os.path.join(os.getcwd(), os.pardir))}/datasets"

In [None]:
X_train = pd.read_csv(os.path.join(datasets_folder, "obesity_X_train.csv"), index_col=0)
X_train

In [None]:
y_train = pd.read_csv(os.path.join(datasets_folder, "obesity_y_train.csv"), index_col=0)["NObeyesdad"]
y_train

### Dropping the `Weight` Column
As with the cross-validation, we drop make a variant of `X_train` without the `Weight` column:

In [None]:
X_train_no_weight = X_train.drop("Weight", axis=1)
X_train_no_weight

## Training Random Forest
The cross-validation results showed that random forest and SVM achieved accuracies above 80% for both `X_train` and `X_train_no_weight`. The accuracies for random forest were closer to each other, so we will train two random forest classifiers with the tuned hyperparameters.

In [None]:
RAND_STATE=0

In [None]:
rand_forest = RandomForestClassifier(max_depth=None, n_estimators=200, random_state=RAND_STATE)
rand_forest.fit(X_train, y_train)

In [None]:
rand_forest_no_weight = RandomForestClassifier(max_depth=None, n_estimators=400, random_state=RAND_STATE)
rand_forest_no_weight.fit(X_train_no_weight, y_train)

## Saving the Trained Models
We define a function for saving a given model by pickling to a specified path:

In [None]:
def save_model(model, filename):
    file_path = f"{os.path.abspath(os.path.join(os.getcwd(), os.pardir))}/models/{filename}"
    with open(file_path, 'wb') as file: 
        pickle.dump(model, file)
    file.close()
    print(f"Model saved to {file_path}")

Finally, we save the trained models:

In [None]:
save_model(rand_forest, 'rand_forest.pkl')
save_model(rand_forest_no_weight, 'rand_forest_no_weight.pkl')