<a href="https://colab.research.google.com/github/Sheik-Abdul/Activity_Detection/blob/main/random_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, AveragePooling1D, Flatten, Bidirectional
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import SGD
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/user01_.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

encoder = OneHotEncoder(categories='auto')
y = encoder.fit_transform(y.reshape(-1, 1)).toarray()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshape the input data to match the expected input shape of the CNN
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Reshape the input data to match the expected input shape of the LSTM
X_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model with different activation functions

def create_lstm_model(units=64, dropout_rate=0.2, activation='relu', optimizer='adam',
                      recurrent_activation='sigmoid', bias_initializer='zeros',
                      kernel_initializer='orthogonal', return_sequences=False,
                      stateful=False, batch_size=32, recurrent_initializer='orthogonal'):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=units, return_sequences=True, recurrent_activation=recurrent_activation,
                                 bias_initializer=bias_initializer, kernel_initializer=kernel_initializer,
                                 recurrent_initializer=recurrent_initializer),
                            input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
    model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(units=units, return_sequences=True, recurrent_activation=recurrent_activation,
                                 bias_initializer=bias_initializer, kernel_initializer=kernel_initializer,
                                recurrent_initializer=recurrent_initializer)))
    model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(units=units, recurrent_activation=recurrent_activation,
                                 bias_initializer=bias_initializer, kernel_initializer=kernel_initializer,
                                recurrent_initializer=recurrent_initializer)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=y.shape[1], activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Define the other models (Decision Tree, Random Forest, CNN, etc.)
def create_cnn_model(filters=16, kernel_size=3, strides=1, padding='valid', pool_size=2, activation='relu',
                     pooling=None, dropout_rate=0.2, optimizer='adam', batch_size=32, epochs=10,
                     learning_rate=0.001, momentum=0.9, validation_split=0.1):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding,
                     activation=activation, input_shape=(X_train.shape[1], 1)))
    if pooling is not None:
        if pooling == 'max':
            model.add(MaxPooling1D(pool_size=pool_size))
        elif pooling == 'average':
            model.add(AveragePooling1D(pool_size=pool_size))
    model.add(Flatten())
    model.add(Dense(units=32, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=y.shape[1], activation='softmax'))
    optimizer = SGD(learning_rate=learning_rate, momentum=momentum)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create a dictionary of models
models = {
    'LSTM': KerasClassifier(build_fn=create_lstm_model, epochs=100, batch_size=32, verbose=0),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'CNN': KerasClassifier(build_fn=create_cnn_model, epochs=100, batch_size=32, verbose=0)
}

# Define the hyperparameters for grid search for each model
hyperparameters = {
    'LSTM': {
        'units': [32, 64, 128],  # Number of units/neurons in the LSTM layer
        'activation': ['tanh', 'relu'],  # Activation function
        'recurrent_activation': ['sigmoid', 'tanh'],
        'kernel_initializer': ['glorot_uniform', 'orthogonal'],  # Weight initializer for the kernel weights matrix
        'recurrent_initializer': ['glorot_uniform', 'orthogonal'],  # Weight initializer for the recurrent_kernel weights matrix
        'bias_initializer': ['zeros', 'ones'],  # Initializer for the bias vector
        'dropout_rate': [0.0, 0.2, 0.5],  # Fraction of units to drop for input transformation
        'return_sequences': [True, False],  # Whether to return the full sequence or only the last output
        'stateful': [True, False],  # Whether to use the last state as the initial state for the next batch
        'batch_size': [32, 64]  # Number of samples per gradient update
    },
    'Decision Tree': {
        'criterion': ['gini', 'entropy', 'log_loss'],
        'splitter': ['best', 'random'],
        'max_depth': [None, 5, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 5],
        'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
        'max_features': [None, 'sqrt'],
        'max_leaf_nodes': [None, 5, 10],
        'min_impurity_decrease': [0.0, 0.1, 0.2],
        'ccp_alpha': [0.0, 0.1, 0.2]
    },
    'Random Forest': {
        'n_estimators': [100, 200, 300],
        'criterion': ['gini', 'entropy', 'log_loss'],
        'max_depth': [None, 5, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 5],
        'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
        'max_features': ['sqrt', 'log2', None],
        'max_leaf_nodes': [None, 10, 20],
        'min_impurity_decrease': [0.0, 0.1, 0.2],
        'ccp_alpha': [0.0, 0.1, 0.2],
        'max_samples': [None, 0.5, 0.8]
    },
    'CNN': {
        'filters': [16, 32, 64],
        'kernel_size': [3, 5],
        'pool_size': [2, 4, 8],
        'strides': [1, 2],
        'padding': ['same', 'valid'],
        'activation': ['relu', 'sigmoid'],
        'pooling': [None, 'max', 'average'],
        'dropout_rate': [0.0, 0.25, 0.5],
        'optimizer': ['adam', 'rmsprop'],
        'batch_size': [32, 64, 128],
        'epochs': [10, 20, 30],
        'learning_rate': [0.001, 0.01, 0.1],
        'momentum': [0.9, 0.95, 0.99],
        'validation_split': [0.1, 0.2, 0.3]
    }
}

# Perform grid search for each model
best_models = {}
for model_name, model in models.items():
    params = hyperparameters.get(model_name, {})
    random_search = RandomizedSearchCV(estimator=model, param_distributions=params, cv=3, n_iter=10)
    if model_name == 'LSTM':
        random_search.fit(X_train_lstm, y_train)
        best_batch_size_lstm = random_search.best_params_['batch_size']
    elif model_name == 'CNN':
        random_search.fit(X_train_cnn, y_train)
        best_batch_size_cnn = random_search.best_params_['batch_size']
    else:
        random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_
    best_models[model_name] = {
        'model': best_model,
        'best_params': random_search.best_params_,
    }


early_stopping = EarlyStopping(monitor='val_loss', patience=3)
# Evaluate the best models
for model_name, model_data in best_models.items():
    best_model = model_data['model']
    best_params = model_data['best_params']
    y_true_classes = np.argmax(y_test, axis=1)
    if model_name == 'LSTM':
        best_model.fit(X_train_lstm, y_train,
                       epochs=100,
                       batch_size=best_batch_size_lstm,
                       validation_data=(X_test_lstm, y_test),
                       callbacks=early_stopping
                      )
        y_pred_lstm = best_model.predict_proba(X_test_lstm)
        y_pred_classes_lstm = np.argmax(y_pred_lstm, axis=1)
        accuracy = accuracy_score(y_true_classes, y_pred_classes_lstm)
        precision = precision_score(y_true_classes, y_pred_classes_lstm, average='weighted',zero_division=1)
        recall = recall_score(y_true_classes, y_pred_classes_lstm, average='weighted')
        f1 = f1_score(y_true_classes, y_pred_classes_lstm, average='weighted')
        roc_auc = roc_auc_score(y_test, y_pred_lstm, average='weighted', multi_class='ovr')
        print("Evaluation Metrics LSTM:")
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F1 Score:", f1)
        print("ROC AUC:", roc_auc)
    elif model_name == 'CNN':
        best_model.fit(X_train_cnn, y_train,
                       epochs=100,
                       batch_size=best_batch_size_cnn,
                       validation_data=(X_test_cnn, y_test),
                       callbacks=early_stopping
                      )
        y_pred_cnn = best_model.predict_proba(X_test_cnn)
        y_pred_classes_cnn = np.argmax(y_pred_cnn, axis=1)
        accuracy = accuracy_score(y_true_classes, y_pred_classes_cnn)
        precision = precision_score(y_true_classes, y_pred_classes_cnn, average='weighted',zero_division=1)
        recall = recall_score(y_true_classes, y_pred_classes_cnn, average='weighted')
        f1 = f1_score(y_true_classes, y_pred_classes_cnn, average='weighted')
        roc_auc = roc_auc_score(y_test, y_pred_cnn, average='weighted', multi_class='ovr')
        print("Evaluation Metrics CNN:")
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F1 Score:", f1)
        print("ROC AUC:", roc_auc)
    else:
        best_model.fit(X_train, y_train)
        y_pred = best_model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted',zero_division=1)
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test, y_pred, average='weighted', multi_class='ovr')
        print(f"Evaluation Metrics {model_name}:")
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F1 Score:", f1)
        print("ROC AUC:", roc_auc)
    print(f"{model_name} - Best Parameters: {best_params}, Accuracy: {accuracy}")

  'LSTM': KerasClassifier(build_fn=create_lstm_model, epochs=100, batch_size=32, verbose=0),
  'CNN': KerasClassifier(build_fn=create_cnn_model, epochs=100, batch_size=32, verbose=0)


Evaluation Metrics LSTM:
Accuracy: 0.9409282700421941
Precision: 0.9447048557853209
Recall: 0.9409282700421941
F1 Score: 0.9419059404891366
ROC AUC: 0.9913460404618946
LSTM - Best Parameters: {'units': 32, 'stateful': True, 'return_sequences': False, 'recurrent_initializer': 'glorot_uniform', 'recurrent_activation': 'sigmoid', 'kernel_initializer': 'glorot_uniform', 'dropout_rate': 0.0, 'bias_initializer': 'ones', 'batch_size': 32, 'activation': 'relu'}, Accuracy: 0.9409282700421941
Evaluation Metrics Decision Tree:
Accuracy: 0.9113924050632911
Precision: 0.9205015647228085
Recall: 0.9113924050632911
F1 Score: 0.9134345395476474
ROC AUC: 0.9464622111545591
Decision Tree - Best Parameters: {'splitter': 'best', 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 10, 'min_samples_leaf': 1, 'min_impurity_decrease': 0.0, 'max_leaf_nodes': 5, 'max_features': None, 'max_depth': 10, 'criterion': 'log_loss', 'ccp_alpha': 0.0}, Accuracy: 0.9113924050632911
Evaluation Metrics Random Forest:
Acc