In [2]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.2.0-py3-none-any.whl (390 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.11.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cmaes>=0.9.1 (from optuna)
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, AveragePooling1D, Flatten, Bidirectional
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import SGD
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
import optuna
from optuna.trial import Trial

# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/user01_.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

encoder = OneHotEncoder(categories='auto')
y = encoder.fit_transform(y.reshape(-1, 1)).toarray()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshape the input data to match the expected input shape of the CNN
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Reshape the input data to match the expected input shape of the LSTM
X_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model with different activation functions
def create_lstm_model(trial):
    units = trial.suggest_categorical('units', [32, 64, 128])
    dropout_rate = trial.suggest_categorical('dropout_rate_lstm', [0.2, 0.5])
    optimizer = trial.suggest_categorical('optimizer_lstm', ['adam', 'rmsprop'])
    activation = trial.suggest_categorical('activation_lstm', ['relu', 'tanh'])
    recurrent_activation = trial.suggest_categorical('recurrent_activation', ['sigmoid', 'tanh'])
    bias_initializer = trial.suggest_categorical('bias_initializer', ['zeros', 'ones'])
    kernel_initializer = trial.suggest_categorical('kernel_initializer', ['glorot_uniform', 'orthogonal'])
    recurrent_initializer = trial.suggest_categorical('recurrent_initializer', ['glorot_uniform', 'orthogonal'])
    return_sequences = trial.suggest_categorical('return_sequences', [True, False])
    stateful = trial.suggest_categorical('stateful', [True, False])
    batch_size = trial.suggest_categorical('batch_size', [32, 64])

    model = Sequential()
    model.add(Bidirectional(LSTM(units=units, return_sequences=True, recurrent_activation=recurrent_activation,
                                 bias_initializer=bias_initializer, kernel_initializer=kernel_initializer,
                                 recurrent_initializer=recurrent_initializer),
                            input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
    model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(units=units, return_sequences=True, recurrent_activation=recurrent_activation,
                                 bias_initializer=bias_initializer, kernel_initializer=kernel_initializer,
                                 recurrent_initializer=recurrent_initializer)))
    model.add(Flatten())
    model.add(Dense(units=64, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=y_train.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

def create_cnn_model(trial):
    filters = trial.suggest_categorical('filters', [16, 32, 64])
    kernel_size = trial.suggest_categorical('kernel_size', [3, 5, 7])
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'rmsprop'])
    pooling = trial.suggest_categorical('pooling', ['max', 'average'])
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)
    dense_units = trial.suggest_categorical('dense_units', [64, 128, 256])

    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation=activation,
                     input_shape=(X_train.shape[1], 1)))

    if pooling == 'max':
        model.add(MaxPooling1D(pool_size=2))
    elif pooling == 'average':
        model.add(AveragePooling1D(pool_size=2))

    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation=activation))

    if pooling == 'max':
        model.add(MaxPooling1D(pool_size=2))
    elif pooling == 'average':
        model.add(AveragePooling1D(pool_size=2))

    model.add(Flatten())
    model.add(Dense(units=dense_units, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=y_train.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model


def create_decision_tree_model(trial):
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy', 'log_loss'])
    splitter = trial.suggest_categorical('splitter', ['best', 'random'])
    max_depth = trial.suggest_int('max_depth', 3, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    min_weight_fraction_leaf = trial.suggest_categorical('min_weight_fraction_leaf', [0.0, 0.1, 0.2])
    min_impurity_decrease = trial.suggest_categorical('min_impurity_decrease', [0.0, 0.1, 0.2])
    ccp_alpha = trial.suggest_categorical('ccp_alpha', [0.0, 0.1, 0.2])

    model = DecisionTreeClassifier(criterion=criterion, splitter=splitter, max_depth=max_depth, min_samples_split=min_samples_split,
                                   min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features='sqrt',
                                   max_leaf_nodes=10, min_impurity_decrease=min_impurity_decrease, ccp_alpha=ccp_alpha)
    return model

# Define the Random Forest model
def create_random_forest_model(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000, step=100)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy', 'log_loss'])
    max_depth = trial.suggest_int('max_depth', 3, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    min_weight_fraction_leaf = trial.suggest_categorical('min_weight_fraction_leaf', [0.0, 0.1, 0.2])
    min_impurity_decrease = trial.suggest_categorical('min_impurity_decrease', [0.0, 0.1, 0.2])
    ccp_alpha = trial.suggest_categorical('ccp_alpha', [0.0, 0.1, 0.2])
    max_samples = trial.suggest_categorical('max_samples', [None, 0.5, 0.8])

    model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split,
                                   min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features='sqrt',
                                   max_leaf_nodes=10, min_impurity_decrease=min_impurity_decrease, ccp_alpha=ccp_alpha, max_samples=max_samples)
    return model

y_true_classes = np.argmax(y_test, axis=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
# Define the objective function for Optuna optimization
def objective(trial):
    model_type = trial.suggest_categorical('model_type', ['lstm', 'cnn', 'decision_tree', 'random_forest'])
    if model_type == 'lstm':
        model = create_lstm_model(trial)
        model.fit(X_train_lstm, y_train,
                epochs=100,
                batch_size=64,
                validation_data=(X_test_lstm, y_test),
                callbacks=early_stopping, verbose=0
              )
        y_pred_lstm = model.predict(X_test_lstm)
        y_pred_classes_lstm = np.argmax(y_pred_lstm, axis=1)
        accuracy = accuracy_score(y_true_classes, y_pred_classes_lstm)
    elif model_type == 'cnn':
        model = create_cnn_model(trial)
        model.fit(X_train_cnn, y_train,
                       epochs=100,
                       batch_size=64,
                       validation_data=(X_test_cnn, y_test),
                       callbacks=early_stopping, verbose=0
                      )
        y_pred_cnn = model.predict(X_test_cnn)
        y_pred_classes_cnn = np.argmax(y_pred_cnn, axis=1)
        accuracy = accuracy_score(y_true_classes, y_pred_classes_cnn)
    elif model_type == 'decision_tree':
        model = create_decision_tree_model(trial)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
    else:
        model = create_random_forest_model(trial)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

    return 1 - accuracy

# Perform Bayesian optimization with Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Get the best parameters
best_params = study.best_params

if best_params['model_type'] == 'lstm':
    trial = optuna.trial.FixedTrial(best_params)
    model = create_lstm_model(trial)
    model.fit(X_train_lstm, y_train,
                       epochs=100,
                       batch_size=64,
                       validation_data=(X_test_lstm, y_test),
                       callbacks=early_stopping
                      )
    y_pred_lstm = model.predict(X_test_lstm)
    y_pred_classes_lstm = np.argmax(y_pred_lstm, axis=1)
    accuracy = accuracy_score(y_true_classes, y_pred_classes_lstm)
    precision = precision_score(y_true_classes, y_pred_classes_lstm, average='weighted',zero_division=1)
    recall = recall_score(y_true_classes, y_pred_classes_lstm, average='weighted')
    f1 = f1_score(y_true_classes, y_pred_classes_lstm, average='weighted')
    roc_auc = roc_auc_score(y_test, y_pred_lstm, average='weighted', multi_class='ovr')
    print("Evaluation Metrics LSTM:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("ROC AUC:", roc_auc)
elif best_params['model_type'] == 'cnn':
    trial = optuna.trial.FixedTrial(best_params)
    model = create_cnn_model(trial)
    model.fit(X_train_cnn, y_train,
                       epochs=100,
                       batch_size=64,
                       validation_data=(X_test_cnn, y_test),
                       callbacks=early_stopping
                      )
    y_pred_cnn = model.predict(X_test_cnn)
    y_pred_classes_cnn = np.argmax(y_pred_cnn, axis=1)
    accuracy = accuracy_score(y_true_classes, y_pred_classes_cnn)
    precision = precision_score(y_true_classes, y_pred_classes_cnn, average='weighted',zero_division=1)
    recall = recall_score(y_true_classes, y_pred_classes_cnn, average='weighted')
    f1 = f1_score(y_true_classes, y_pred_classes_cnn, average='weighted')
    roc_auc = roc_auc_score(y_test, y_pred_cnn, average='weighted', multi_class='ovr')
    print("Evaluation Metrics CNN:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("ROC AUC:", roc_auc)
elif best_params['model_type'] == 'decision_tree':
    trial = optuna.trial.FixedTrial(best_params)
    model = create_decision_tree_model(trial)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted',zero_division=1)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test, y_pred, average='weighted', multi_class='ovr')
    print(f"Evaluation Metrics {best_params['model_type']}:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("ROC AUC:", roc_auc)
else:
    trial = optuna.trial.FixedTrial(best_params)
    model = create_random_forest_model(trial)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted',zero_division=1)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_test, y_pred, average='weighted', multi_class='ovr')
    print(f"Evaluation Metrics {best_params['model_type']}:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("ROC AUC:", roc_auc)


[I 2023-06-21 21:32:24,337] A new study created in memory with name: no-name-57793e90-ed7f-4561-b7b6-28f62f756e04
[I 2023-06-21 21:32:24,357] Trial 0 finished with value: 0.12447257383966248 and parameters: {'model_type': 'decision_tree', 'criterion': 'gini', 'splitter': 'best', 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 3, 'min_weight_fraction_leaf': 0.0, 'min_impurity_decrease': 0.0, 'ccp_alpha': 0.0}. Best is trial 0 with value: 0.12447257383966248.




[I 2023-06-21 21:32:41,268] Trial 1 finished with value: 0.03586497890295359 and parameters: {'model_type': 'lstm', 'units': 32, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'tanh', 'bias_initializer': 'ones', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'glorot_uniform', 'return_sequences': False, 'stateful': True, 'batch_size': 32}. Best is trial 1 with value: 0.03586497890295359.
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)




[I 2023-06-21 21:32:50,804] Trial 2 finished with value: 0.14135021097046419 and parameters: {'model_type': 'cnn', 'filters': 16, 'kernel_size': 7, 'activation': 'sigmoid', 'optimizer': 'adam', 'pooling': 'max', 'dropout_rate': 0.0865430013783719, 'dense_units': 256}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:33:07,859] Trial 3 finished with value: 0.05485232067510548 and parameters: {'model_type': 'lstm', 'units': 64, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'rmsprop', 'activation_lstm': 'tanh', 'recurrent_activation': 'sigmoid', 'bias_initializer': 'zeros', 'kernel_initializer': 'glorot_uniform', 'recurrent_initializer': 'glorot_uniform', 'return_sequences': False, 'stateful': True, 'batch_size': 64}. Best is trial 1 with value: 0.03586497890295359.
[I 2023-06-21 21:33:09,022] Trial 4 finished with value: 0.069620253164557 and parameters: {'model_type': 'random_forest', 'n_estimators': 300, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'min_impurity_decrease': 0.0, 'ccp_alpha': 0.0, 'max_samples': 0.5}. Best is trial 1 with value: 0.03586497890295359.
[I 2023-06-21 21:33:09,452] Trial 5 finished with value: 0.23206751054852326 and parameters: {'model_type': 'random_forest', 'n_estimators': 100, '



[I 2023-06-21 21:33:13,527] Trial 6 finished with value: 0.10126582278481011 and parameters: {'model_type': 'cnn', 'filters': 32, 'kernel_size': 5, 'activation': 'relu', 'optimizer': 'adam', 'pooling': 'max', 'dropout_rate': 0.42607429770569455, 'dense_units': 64}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:33:30,402] Trial 7 finished with value: 0.04008438818565396 and parameters: {'model_type': 'lstm', 'units': 64, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'sigmoid', 'bias_initializer': 'zeros', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'orthogonal', 'return_sequences': False, 'stateful': False, 'batch_size': 64}. Best is trial 1 with value: 0.03586497890295359.
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)




[I 2023-06-21 21:33:42,252] Trial 8 finished with value: 0.12869198312236285 and parameters: {'model_type': 'cnn', 'filters': 32, 'kernel_size': 5, 'activation': 'sigmoid', 'optimizer': 'adam', 'pooling': 'max', 'dropout_rate': 0.23711321465547852, 'dense_units': 128}. Best is trial 1 with value: 0.03586497890295359.
[I 2023-06-21 21:33:43,599] Trial 9 finished with value: 1.0 and parameters: {'model_type': 'random_forest', 'n_estimators': 600, 'criterion': 'gini', 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 6, 'min_weight_fraction_leaf': 0.2, 'min_impurity_decrease': 0.0, 'ccp_alpha': 0.1, 'max_samples': 0.5}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:33:53,968] Trial 10 finished with value: 0.048523206751054815 and parameters: {'model_type': 'lstm', 'units': 32, 'dropout_rate_lstm': 0.5, 'optimizer_lstm': 'adam', 'activation_lstm': 'relu', 'recurrent_activation': 'tanh', 'bias_initializer': 'ones', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'glorot_uniform', 'return_sequences': True, 'stateful': True, 'batch_size': 32}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:34:01,988] Trial 11 finished with value: 0.052742616033755296 and parameters: {'model_type': 'lstm', 'units': 32, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'tanh', 'bias_initializer': 'zeros', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'orthogonal', 'return_sequences': False, 'stateful': False, 'batch_size': 32}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:34:20,096] Trial 12 finished with value: 0.04641350210970463 and parameters: {'model_type': 'lstm', 'units': 64, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'sigmoid', 'bias_initializer': 'ones', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'orthogonal', 'return_sequences': False, 'stateful': False, 'batch_size': 64}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:34:38,172] Trial 13 finished with value: 0.048523206751054815 and parameters: {'model_type': 'lstm', 'units': 128, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'sigmoid', 'bias_initializer': 'zeros', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'orthogonal', 'return_sequences': False, 'stateful': False, 'batch_size': 32}. Best is trial 1 with value: 0.03586497890295359.
[I 2023-06-21 21:34:38,196] Trial 14 finished with value: 1.0 and parameters: {'model_type': 'decision_tree', 'criterion': 'entropy', 'splitter': 'random', 'max_depth': 3, 'min_samples_split': 10, 'min_samples_leaf': 10, 'min_weight_fraction_leaf': 0.2, 'min_impurity_decrease': 0.2, 'ccp_alpha': 0.2}. Best is trial 1 with value: 0.03586497890295359.




[I 2023-06-21 21:34:53,705] Trial 15 finished with value: 0.044303797468354444 and parameters: {'model_type': 'lstm', 'units': 64, 'dropout_rate_lstm': 0.2, 'optimizer_lstm': 'adam', 'activation_lstm': 'tanh', 'recurrent_activation': 'tanh', 'bias_initializer': 'ones', 'kernel_initializer': 'orthogonal', 'recurrent_initializer': 'glorot_uniform', 'return_sequences': False, 'stateful': True, 'batch_size': 64}. Best is trial 1 with value: 0.03586497890295359.
