In [3]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import optuna

combined_data = load('../../DataDumps/Spectral_Noise_Addition_2_Class.joblib')
combined_labels = load('../../DataDumps/2class_labels.joblib')

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]
    return data.reshape(num_samples, num_timesteps * num_channels)


combined_data = reshape_data(combined_data)  
# Split data into training/validation and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(combined_data, combined_labels, test_size=0.2, random_state=42)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 300),
        'max_depth': trial.suggest_int('max_depth', 1, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
    }

    # Initialize variables to store fold accuracies
    fold_accuracies = []

    # Perform K-Fold cross-validation
    for train_index, val_index in skf.split(X_train_val, y_train_val):
        X_train, X_val = X_train_val[train_index], X_train_val[val_index]
        y_train, y_val = y_train_val[train_index], y_train_val[val_index]
        
        # Initialize Random Forest classifier with current hyperparameters
        rf = RandomForestClassifier(**params)
        
        # Train the classifier
        rf.fit(X_train, y_train)
        
        # Make predictions on validation set
        predictions_val = rf.predict(X_val)
        
        # Calculate accuracy on validation set
        accuracy_val = accuracy_score(y_val, predictions_val)
        
        # Store accuracy for current fold
        fold_accuracies.append(accuracy_val)

    # Calculate average accuracy across all folds
    avg_accuracy = np.mean(fold_accuracies)

    return avg_accuracy
# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200, n_jobs=-1)

# Get the best hyperparameters found
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Initialize Random Forest classifier with best hyperparameters
best_rf = RandomForestClassifier(**best_params, random_state=42)

# Train the best model on the entire dataset
best_rf.fit(X_train_val, y_train_val)

# Save the best model if needed
dump(best_rf, 'SpecAdd-2.joblib')

# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-04-26 14:08:04,213] A new study created in memory with name: no-name-acd001b0-c5c4-4d0a-910a-a976a2052212
[I 2024-04-26 14:08:19,875] Trial 6 finished with value: 0.8468896734976159 and parameters: {'n_estimators': 162, 'max_depth': 5, 'min_samples_split': 14, 'min_samples_leaf': 17, 'max_features': 'log2', 'bootstrap': True}. Best is trial 6 with value: 0.8468896734976159.
[I 2024-04-26 14:08:29,275] Trial 23 finished with value: 0.8547209987988206 and parameters: {'n_estimators': 190, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 14, 'max_features': 'log2', 'bootstrap': False}. Best is trial 23 with value: 0.8547209987988206.
[I 2024-04-26 14:08:31,934] Trial 17 finished with value: 0.8601536053579879 and parameters: {'n_estimators': 183, 'max_depth': 43, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_features': 'log2', 'bootstrap': True}. Best is trial 17 with value: 0.8601536053579879.
[I 2024-04-26 14:08:34,263] Trial 28 finished with value: 0.851106540

In [2]:
import winsound
winsound.Beep(1440, 500)