In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.model_selection import train_test_split
import optuna

In [3]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib')

In [39]:
half = X_train_val[::2]
half_labels = y_train_val[::2]

In [None]:
second_half = X_train_val[1::2]
second_half_labels = y_train_val[1::2]

In [47]:
X_test = np.append(X_test, second_half, axis=0)
y_test = np.append(y_test, second_half_labels, axis=0)

In [50]:
X_train_val = half
y_train_val = half_labels

In [51]:
def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  
# Split data into training/validation and test sets

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 300),
        'max_depth': trial.suggest_int('max_depth', 1, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
    }

    # Initialize variables to store fold accuracies
    fold_accuracies = []

    # Perform K-Fold cross-validation
    for train_index, val_index in skf.split(X_train_val, y_train_val):
        X_train, X_val = X_train_val[train_index], X_train_val[val_index]
        y_train, y_val = y_train_val[train_index], y_train_val[val_index]
        
        # Initialize Random Forest classifier with current hyperparameters
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)

        # Train the classifier
        rf.fit(X_train, y_train)
        
        # Make predictions on validation set
        predictions_val = rf.predict(X_val)
        
        # Calculate accuracy on validation set
        accuracy_val = accuracy_score(y_val, predictions_val)
        
        # Store accuracy for current fold
        fold_accuracies.append(accuracy_val)

    # Calculate average accuracy across all folds
    avg_accuracy = np.mean(fold_accuracies)

    return avg_accuracy
# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Get the best hyperparameters found
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Initialize Random Forest classifier with best hyperparameters
best_rf = RandomForestClassifier(**best_params, n_jobs=-1, random_state=42)

# Train the best model on the entire dataset
best_rf.fit(X_train_val, y_train_val)

# Save the best model if needed
dump(best_rf, 'RF-HalfData.joblib')

# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print("F-Score on Test Set:", f1_score(y_test, predictions_test))
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-05-22 14:36:44,835] A new study created in memory with name: no-name-3356d183-ff08-4d6a-9dcf-94d617d0205b
[I 2024-05-22 14:37:50,235] Trial 0 finished with value: 0.9225556772544724 and parameters: {'n_estimators': 17, 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 5, 'max_features': None, 'bootstrap': True}. Best is trial 0 with value: 0.9225556772544724.
[I 2024-05-22 14:37:55,168] Trial 1 finished with value: 0.9322234392113911 and parameters: {'n_estimators': 284, 'max_depth': 14, 'min_samples_split': 14, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 1 with value: 0.9322234392113911.
[I 2024-05-22 14:37:56,363] Trial 2 finished with value: 0.9152756480467323 and parameters: {'n_estimators': 158, 'max_depth': 22, 'min_samples_split': 18, 'min_samples_leaf': 20, 'max_features': 'log2', 'bootstrap': True}. Best is trial 1 with value: 0.9322234392113911.
[I 2024-05-22 14:37:58,211] Trial 3 finished with value: 0.9164804673238407 

Best Hyperparameters: {'n_estimators': 222, 'max_depth': 24, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}


ValueError: Found input variables with inconsistent numbers of samples: [1286, 2112]

In [None]:
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

Accuracy on Training Set: 0.9933155080213903
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       374
           1       1.00      0.99      0.99       374

    accuracy                           0.99       748
   macro avg       0.99      0.99      0.99       748
weighted avg       0.99      0.99      0.99       748

[[374   0]
 [  5 369]]


In [None]:
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

Accuracy on Test Set: 0.9739130434782609
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       230
           1       0.97      0.98      0.97       230

    accuracy                           0.97       460
   macro avg       0.97      0.97      0.97       460
weighted avg       0.97      0.97      0.97       460

[[222   8]
 [  4 226]]


In [9]:
import matplotlib.pyplot as plt
def plot_data(test_set, predictions_test, y_test):
    for idx, example in enumerate(test_set):
        if predictions_test[idx] == y_test[idx]:
            continue
        fig, axs = plt.subplots(example.shape[0], sharex=True)
        axs[0].set_title(f"Wrong Classification, Should be {y_test[idx]}")
        axs[0].plot(example[0])
        axs[0].set_ylabel("HHE")
        axs[1].plot(example[1])
        axs[1].set_ylabel("HHN")
        axs[2].plot(example[2])
        axs[2].set_ylabel("HHZ")

        plt.xlabel("Time")
        plt.show()

In [None]:
test_set = load('../Dump2/raw_X_test.joblib')
plot_data(test_set, predictions_test, y_test)

In [None]:
plot_data(test_set, )

In [None]:
import winsound    
winsound.Beep(1440, 500)

In [4]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

In [5]:
half = X_train_val[::2]
half_labels = y_train_val[::2]

In [6]:
second_half = X_train_val[1::2]
second_half_labels = y_train_val[1::2]

In [7]:
X_test = np.append(X_test, second_half, axis=0)
y_test = np.append(y_test, second_half_labels, axis=0)

In [8]:
X_train_val = half
y_train_val = half_labels

In [9]:
def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  
# Split data into training/validation and test sets

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 300),
        'max_depth': trial.suggest_int('max_depth', 1, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
    }

    # Initialize variables to store fold accuracies
    fold_accuracies = []

    # Perform K-Fold cross-validation
    for train_index, val_index in skf.split(X_train_val, y_train_val):
        X_train, X_val = X_train_val[train_index], X_train_val[val_index]
        y_train, y_val = y_train_val[train_index], y_train_val[val_index]
        
        # Initialize Random Forest classifier with current hyperparameters
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)

        # Train the classifier
        rf.fit(X_train, y_train)
        
        # Make predictions on validation set
        predictions_val = rf.predict(X_val)
        
        # Calculate accuracy on validation set
        accuracy_val = accuracy_score(y_val, predictions_val)
        
        # Store accuracy for current fold
        fold_accuracies.append(accuracy_val)

    # Calculate average accuracy across all folds
    avg_accuracy = np.mean(fold_accuracies)

    return avg_accuracy
# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Get the best hyperparameters found
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Initialize Random Forest classifier with best hyperparameters
best_rf = RandomForestClassifier(**best_params, n_jobs=-1, random_state=42)

# Train the best model on the entire dataset
best_rf.fit(X_train_val, y_train_val)

# Save the best model if needed
dump(best_rf, 'RF-HalfData.joblib')

# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print("F-Score on Test Set:", f1_score(y_test, predictions_test))
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-05-23 18:26:19,311] A new study created in memory with name: no-name-f2d75d3f-ab17-48e2-904d-8d5af22f34d7
[I 2024-05-23 18:26:20,133] Trial 0 finished with value: 0.9225556772544724 and parameters: {'n_estimators': 55, 'max_depth': 44, 'min_samples_split': 19, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.9225556772544724.
[I 2024-05-23 18:26:20,769] Trial 1 finished with value: 0.9298064987221613 and parameters: {'n_estimators': 30, 'max_depth': 42, 'min_samples_split': 17, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': False}. Best is trial 1 with value: 0.9298064987221613.
[I 2024-05-23 18:26:22,095] Trial 2 finished with value: 0.9261774370208105 and parameters: {'n_estimators': 198, 'max_depth': 50, 'min_samples_split': 7, 'min_samples_leaf': 10, 'max_features': 'log2', 'bootstrap': False}. Best is trial 1 with value: 0.9298064987221613.
[I 2024-05-23 18:26:23,485] Trial 3 finished with value: 0.90073749543629

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score


In [None]:
f1_score(y_test, predictions_test)

0.9743589743589743

In [None]:
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

Accuracy on Training Set: 0.9933155080213903
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       374
           1       1.00      0.99      0.99       374

    accuracy                           0.99       748
   macro avg       0.99      0.99      0.99       748
weighted avg       0.99      0.99      0.99       748

[[374   0]
 [  5 369]]


In [None]:
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

Accuracy on Test Set: 0.9739130434782609
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       230
           1       0.97      0.98      0.97       230

    accuracy                           0.97       460
   macro avg       0.97      0.97      0.97       460
weighted avg       0.97      0.97      0.97       460

[[222   8]
 [  4 226]]


In [7]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]

X_train_val = np.append(quarter, second_qtr, axis=0)
X_train_val = np.append(X_train_val, third_qtr, axis=0)
y_train_val = np.append(quarter_labels, second_qtr_labels, axis=0)
y_train_val = np.append(y_train_val, third_qtr_labels, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  
# Split data into training/validation and test sets

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 300),
        'max_depth': trial.suggest_int('max_depth', 1, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
    }

    # Initialize variables to store fold accuracies
    fold_accuracies = []

    # Perform K-Fold cross-validation
    for train_index, val_index in skf.split(X_train_val, y_train_val):
        X_train, X_val = X_train_val[train_index], X_train_val[val_index]
        y_train, y_val = y_train_val[train_index], y_train_val[val_index]
        
        # Initialize Random Forest classifier with current hyperparameters
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)

        # Train the classifier
        rf.fit(X_train, y_train)
        
        # Make predictions on validation set
        predictions_val = rf.predict(X_val)
        
        # Calculate accuracy on validation set
        accuracy_val = accuracy_score(y_val, predictions_val)
        
        # Store accuracy for current fold
        fold_accuracies.append(accuracy_val)

    # Calculate average accuracy across all folds
    avg_accuracy = np.mean(fold_accuracies)

    return avg_accuracy
# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Get the best hyperparameters found
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Initialize Random Forest classifier with best hyperparameters
best_rf = RandomForestClassifier(**best_params, n_jobs=-1, random_state=42)

# Train the best model on the entire dataset
best_rf.fit(X_train_val, y_train_val)

# Save the best model if needed
dump(best_rf, 'RF-3QuarterData.joblib')

# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print("F-Score on Test Set:", f1_score(y_test, predictions_test))
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

f1_score(y_test, predictions_test)
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-05-23 18:37:15,456] A new study created in memory with name: no-name-0a45d755-06f2-4b9c-b4a0-21570cc195cd
[I 2024-05-23 18:37:21,261] Trial 0 finished with value: 0.9418930390492359 and parameters: {'n_estimators': 202, 'max_depth': 4, 'min_samples_split': 14, 'min_samples_leaf': 15, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 0 with value: 0.9418930390492359.
[I 2024-05-23 18:37:25,917] Trial 1 finished with value: 0.9483511819250359 and parameters: {'n_estimators': 106, 'max_depth': 30, 'min_samples_split': 4, 'min_samples_leaf': 13, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 1 with value: 0.9483511819250359.
[I 2024-05-23 18:47:32,447] Trial 2 finished with value: 0.9112086979234686 and parameters: {'n_estimators': 99, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 7, 'max_features': None, 'bootstrap': False}. Best is trial 1 with value: 0.9483511819250359.
[I 2024-05-23 18:47:34,448] Trial 3 finished with value: 0.940283400809716

Best Hyperparameters: {'n_estimators': 49, 'max_depth': 31, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}
Accuracy on Test Set: 0.9713631156930126
F-Score on Test Set: 0.9714285714285713
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       437
           1       0.97      0.97      0.97       436

    accuracy                           0.97       873
   macro avg       0.97      0.97      0.97       873
weighted avg       0.97      0.97      0.97       873

[[423  14]
 [ 11 425]]
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       619
           1       1.00      1.00      1.00       620

    accuracy                           1.00      1239
   macro avg       1.00      1.00      1.00      1239
weighted avg       1.00      1.00      1.00      1239

[[619   0]
 [  0 620]]
Accuracy on Test Set: 0.97136311569301

In [8]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]


X_test = np.append(X_test,second_qtr, axis=0)
X_test = np.append(X_test, third_qtr, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test,second_qtr_labels, axis=0)
y_test = np.append(y_test, third_qtr_labels, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)

X_train_val = quarter
y_train_val = quarter_labels

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  
# Split data into training/validation and test sets

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
def objective(trial):
    # Define hyperparameters to optimize
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 300),
        'max_depth': trial.suggest_int('max_depth', 1, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
    }

    # Initialize variables to store fold accuracies
    fold_accuracies = []

    # Perform K-Fold cross-validation
    for train_index, val_index in skf.split(X_train_val, y_train_val):
        X_train, X_val = X_train_val[train_index], X_train_val[val_index]
        y_train, y_val = y_train_val[train_index], y_train_val[val_index]
        
        # Initialize Random Forest classifier with current hyperparameters
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)

        # Train the classifier
        rf.fit(X_train, y_train)
        
        # Make predictions on validation set
        predictions_val = rf.predict(X_val)
        
        # Calculate accuracy on validation set
        accuracy_val = accuracy_score(y_val, predictions_val)
        
        # Store accuracy for current fold
        fold_accuracies.append(accuracy_val)

    # Calculate average accuracy across all folds
    avg_accuracy = np.mean(fold_accuracies)

    return avg_accuracy
# Perform hyperparameter tuning using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# Get the best hyperparameters found
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Initialize Random Forest classifier with best hyperparameters
best_rf = RandomForestClassifier(**best_params, n_jobs=-1, random_state=42)

# Train the best model on the entire dataset
best_rf.fit(X_train_val, y_train_val)

# Save the best model if needed
dump(best_rf, 'RF-1QtrData.joblib')

# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print("F-Score on Test Set:", f1_score(y_test, predictions_test))
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

f1_score(y_test, predictions_test)
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))
# Evaluate the best model on the test set
predictions_test = best_rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-05-23 20:48:34,301] A new study created in memory with name: no-name-e1c946a0-c6bf-4fa2-a86d-db55a503cbc0
[I 2024-05-23 20:48:34,773] Trial 0 finished with value: 0.8741698501322362 and parameters: {'n_estimators': 37, 'max_depth': 1, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': False}. Best is trial 0 with value: 0.8741698501322362.
[I 2024-05-23 20:49:13,633] Trial 1 finished with value: 0.8208051719071408 and parameters: {'n_estimators': 42, 'max_depth': 48, 'min_samples_split': 9, 'min_samples_leaf': 15, 'max_features': None, 'bootstrap': False}. Best is trial 0 with value: 0.8741698501322362.
[I 2024-05-23 20:49:14,746] Trial 2 finished with value: 0.90079341757273 and parameters: {'n_estimators': 212, 'max_depth': 31, 'min_samples_split': 3, 'min_samples_leaf': 16, 'max_features': 'log2', 'bootstrap': True}. Best is trial 2 with value: 0.90079341757273.
[I 2024-05-23 20:49:16,697] Trial 3 finished with value: 0.9080223332353805 and p

Best Hyperparameters: {'n_estimators': 270, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}
Accuracy on Test Set: 0.9535020600353149
F-Score on Test Set: 0.9541497388276262
              precision    recall  f1-score   support

           0       0.97      0.94      0.95       850
           1       0.94      0.97      0.95       849

    accuracy                           0.95      1699
   macro avg       0.95      0.95      0.95      1699
weighted avg       0.95      0.95      0.95      1699

[[798  52]
 [ 27 822]]
Accuracy on Training Set: 0.9975786924939467
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       206
           1       1.00      1.00      1.00       207

    accuracy                           1.00       413
   macro avg       1.00      1.00      1.00       413
weighted avg       1.00      1.00      1.00       413

[[206   0]
 [  1 206]]
Accuracy on Test Set: 0

In [None]:
half = X_train_val[::2]
half_labels = y_train_val[::2]

In [None]:
second_half = X_train_val[1::2]
second_half_labels = y_train_val[1::2]

In [None]:
X_test = np.append(X_test, second_half, axis=0)
y_test = np.append(y_test, second_half_labels, axis=0)

In [None]:
X_train_val = half
y_train_val = half_labels