# Model Training and Tuning

This notebook covers the training and hyperparameter tuning of various machine learning models using the selected features from the AI4I 2020 Predictive Maintenance Dataset.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.cross_decomposition import PLSRegression
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Load the selected features data
data = np.load('../data/selected_features_data.npz')
X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']

## Model Training and Hyperparameter Tuning

In [None]:
# Define a function to evaluate models
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc,
        'confusion_matrix': cm
    }

### Random Forest

In [None]:
# Random Forest model training and tuning
rf = RandomForestClassifier()
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
results_rf = evaluate_model(grid_search_rf, X_train, X_test, y_train, y_test)
print('Random Forest Results:', results_rf)

### Gradient Boosting

In [None]:
# Gradient Boosting model training and tuning
gb = GradientBoostingClassifier()
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}
grid_search_gb = GridSearchCV(estimator=gb, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
results_gb = evaluate_model(grid_search_gb, X_train, X_test, y_train, y_test)
print('Gradient Boosting Results:', results_gb)

### Partial Least Squares (PLS) Regression

In [None]:
# PLS Regression model training and tuning
pls = PLSRegression(n_components=2)
pls.fit(X_train, y_train)
results_pls = evaluate_model(pls, X_train, X_test, y_train, y_test)
print('PLS Regression Results:', results_pls)

### Gaussian Process Regression (GPR)

In [None]:
# Gaussian Process Regression model training and tuning
kernel = 1.0 * RBF(length_scale=1.0)
gpr = GaussianProcessClassifier(kernel=kernel)
param_grid = {
    'max_iter_predict': [100, 200, 300]
}
grid_search_gpr = GridSearchCV(estimator=gpr, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
results_gpr = evaluate_model(grid_search_gpr, X_train, X_test, y_train, y_test)
print('Gaussian Process Regression Results:', results_gpr)

### Neural Networks (NN)

In [None]:
# Neural Networks model training and tuning
nn = MLPClassifier(max_iter=1000)
param_grid = {
    'hidden_layer_sizes': [(50, 50), (100, 100), (100,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001]
}
grid_search_nn = GridSearchCV(estimator=nn, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
results_nn = evaluate_model(grid_search_nn, X_train, X_test, y_train, y_test)
print('Neural Networks Results:', results_nn)

## Conclusion

In this notebook, we have trained and tuned various machine learning models using the selected features from the AI4I 2020 Predictive Maintenance Dataset. The results of each model are printed and can be used to compare the performance of the different models.