In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from keras.models import Sequential
from keras.layers import Dense, LSTM, MaxPooling1D, Flatten, Conv1D, Dropout, Reshape

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# mean_normal_feature_dataframe = pd.read_csv('/content/gdrive/MyDrive/New data Mar 2023/mean_normal_combined_extracted_features.csv')
# mean_stretch_pitch_feature_dataframe = pd.read_csv('/content/gdrive/MyDrive/New data Mar 2023/mean_stretch_pitch_combined_extracted_features.csv')
combined_feature_dataframe = pd.read_csv('/content/gdrive/MyDrive/New data Mar 2023/combined_mean_feature_dataframe.csv')

In [None]:
combined_feature_dataframe.head()

## Data Preparation

In [None]:
X = combined_feature_dataframe.drop(columns = ['Labels'])
Y_label = combined_feature_dataframe['Labels']

In [None]:
x_train_label, x_test_label, y_train_label, y_test_label = train_test_split(X, Y_label, random_state = 42, shuffle = True)

x_train_label.shape, x_test_label.shape, y_train_label.shape, y_test_label.shape

In [None]:
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y_label).reshape(-1,1)).toarray()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state = 42, shuffle = True)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
standard_scale = StandardScaler()
x_train = standard_scale.fit_transform(x_train)
x_test = standard_scale.transform(x_test)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
x_train_2d, x_test_2d, y_train_2d, y_test_2d = x_train, x_test, y_train, y_test

In [None]:
x_train = np.expand_dims(x_train, axis = 2) 
x_test = np.expand_dims(x_test, axis = 2)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

## Data Modelling

### Sequential Model

In [None]:
sequential_model = Sequential()
sequential_model.add(Dense(10, activation = 'relu', input_shape = (x_train.shape[1],)))
sequential_model.add(Dense(10, activation = 'relu'))
sequential_model.add(Dense(7, activation = 'softmax'))

sequential_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

sequential_model.summary()

In [None]:
sequential_model_history = sequential_model.fit(x_train, y_train, epochs = 100, validation_split = 0.2)

In [None]:
epoch = [i for i in range(100)]

figure, axis = plt.subplots(1, 2)

train_accuracy = sequential_model_history.history['accuracy']
train_loss = sequential_model_history.history['loss']
test_accuracy = sequential_model_history.history['val_accuracy']
test_loss = sequential_model_history.history['val_loss']

figure.set_size_inches(24, 5)

axis[0].plot(epoch, train_loss, label = 'Training Loss')
axis[0].plot(epoch, test_loss, label = 'Testing Loss')
axis[0].set_title('Training & Testing Loss')
axis[0].set_xlabel('Epochs')
axis[0].legend()

axis[1].plot(epoch, train_accuracy, label = 'Training Accuracy')
axis[1].plot(epoch, test_accuracy, label = 'Testing Accuracy')
axis[1].set_title('Training & Testing Accuracy')
axis[1].set_xlabel('Epochs')
axis[1].legend()

In [None]:
sequential_model_predict_test = sequential_model.predict(x_test)
sequential_model_y_predict = encoder.inverse_transform(sequential_model_predict_test)
sequential_model_y_test = encoder.inverse_transform(y_test)

In [None]:
sequential_model_prediction_dataframe = pd.DataFrame(columns = ['Predicted Label', 'Actual Label'])
sequential_model_prediction_dataframe['Predicted Label'] = sequential_model_y_predict.flatten()
sequential_model_prediction_dataframe['Actual Label'] = sequential_model_y_test.flatten()

sequential_model_prediction_dataframe.head(10)

In [None]:
print(classification_report(sequential_model_y_test, sequential_model_y_predict))

### Sequential Conv1D Model

In [None]:
sequential_conv1D_model = Sequential()

sequential_conv1D_model.add(Conv1D(256, kernel_size = 5, padding = 'same', activation = 'relu', input_shape = (x_train.shape[1], 1)))
sequential_conv1D_model.add(MaxPooling1D(pool_size = (3), padding = 'same'))
sequential_conv1D_model.add(Dropout(0.2))

sequential_conv1D_model.add(Conv1D(128, kernel_size = 5, padding = 'same', activation = 'relu'))
sequential_conv1D_model.add(MaxPooling1D(pool_size = (3), padding = 'same'))
sequential_conv1D_model.add(Dropout(0.2))

sequential_conv1D_model.add(Conv1D(64, kernel_size = 5, padding = 'same', activation = 'relu'))
sequential_conv1D_model.add(MaxPooling1D(pool_size = (3), padding = 'same'))
sequential_conv1D_model.add(Dropout(0.2))

sequential_conv1D_model.add(Flatten())
sequential_conv1D_model.add(Dense(units = 7, activation = 'softmax'))

sequential_conv1D_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

sequential_conv1D_model.summary()

In [None]:
sequential_conv1D_model_history = sequential_conv1D_model.fit(x_train, y_train, epochs = 150, validation_split = 0.2)

In [None]:
epoch = [i for i in range(150)]

figure, axis = plt.subplots(1, 2)

train_accuracy = sequential_conv1D_model_history.history['accuracy']
train_loss = sequential_conv1D_model_history.history['loss']
test_accuracy = sequential_conv1D_model_history.history['val_accuracy']
test_loss = sequential_conv1D_model_history.history['val_loss']

figure.set_size_inches(24, 5)

axis[0].plot(epoch, train_loss, label = 'Training Loss')
axis[0].plot(epoch, test_loss, label = 'Testing Loss')
axis[0].set_title('Training & Testing Loss')
axis[0].set_xlabel('Epochs')
axis[0].legend()

axis[1].plot(epoch, train_accuracy, label = 'Training Accuracy')
axis[1].plot(epoch, test_accuracy, label = 'Testing Accuracy')
axis[1].set_title('Training & Testing Accuracy')
axis[1].set_xlabel('Epochs')
axis[1].legend()

In [None]:
sequential_conv1D_model_predict_test = sequential_conv1D_model.predict(x_test)
sequential_conv1D_model_y_predict = encoder.inverse_transform(sequential_conv1D_model_predict_test)
sequential_conv1D_model_y_test = encoder.inverse_transform(y_test)

In [None]:
sequential_conv1D_model_prediction_dataframe = pd.DataFrame(columns = ['Predicted Label', 'Actual Label'])
sequential_conv1D_model_prediction_dataframe['Predicted Label'] = sequential_conv1D_model_y_predict.flatten()
sequential_conv1D_model_prediction_dataframe['Actual Label'] = sequential_conv1D_model_y_test.flatten()

sequential_conv1D_model_prediction_dataframe.head(10)

In [None]:
print(classification_report(sequential_conv1D_model_y_test, sequential_conv1D_model_y_predict))

### MLP Classifier

In [None]:
# Long execution time; 10 minutes
mlp_classifier_model = MLPClassifier(max_iter = 1000)

parameter_grid = {
    'hidden_layer_sizes': (250,),
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'batch_size': [250, 500],
    'learning_rate': ['adaptive']
}

mlp_classifier_grid_CV = GridSearchCV(mlp_classifier_model, parameter_grid, cv = 5, n_jobs = -1)
mlp_classifier_grid_CV.fit(x_train_2d, y_train_2d)

In [None]:
print('Best Parameter: ', mlp_classifier_grid_CV.best_params_)

In [None]:
mlp_classifier_model = MLPClassifier(activation = 'relu', hidden_layer_sizes = 250, learning_rate = 'adaptive', solver = 'adam', max_iter = 1000)

mlp_classifier_model_history = mlp_classifier_model.fit(x_train_2d, y_train_2d)

In [None]:
mlp_classifier_model_y_true, mlp_classifier_model_y_predict = y_test_2d, mlp_classifier_model.predict(x_test_2d)

In [None]:
mlp_classifier_model_prediction_dataframe = pd.DataFrame(columns = ['Predicted Label', 'Actual Label'])
mlp_classifier_model_prediction_dataframe['Predicted Label'] = mlp_classifier_model_y_predict.flatten()
mlp_classifier_model_prediction_dataframe['Actual Label'] = y_test_2d.flatten()

mlp_classifier_model_prediction_dataframe.head(10)

In [None]:
print(classification_report(y_test_2d, mlp_classifier_model_y_predict))

### Decision Tree Classifier

In [None]:
dt_classifier_model = DecisionTreeClassifier()

parameter_grid = {
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [5, 6, 7, 8, 9, 10],
    'ccp_alpha': [0.1, 0.01, 0.001, 0.0001],
    'criterion': ['gini', 'entropy']
}

dt_classifier_grid_CV = GridSearchCV(dt_classifier_model, parameter_grid, cv = 5, n_jobs = -1)
dt_classifier_grid_CV.fit(x_train_2d, y_train_2d)

In [None]:
print('Best Parameter: ', dt_classifier_grid_CV.best_params_)

In [None]:
dt_classifier_model = DecisionTreeClassifier(ccp_alpha = 0.0001, criterion = 'entropy', max_depth = 9, max_features = 'sqrt')

dt_classifier_model_history = dt_classifier_model.fit(x_train_2d, y_train_2d)

In [None]:
dt_classifier_model_y_true, dt_classifier_model_y_predict = y_test_2d, dt_classifier_model.predict(x_test_2d)

In [None]:
print(classification_report(y_test_2d, dt_classifier_model_y_predict))

### Randoom Forest Classifier

In [None]:
# Long execution time; 14 minutes
random_forest_model = RandomForestClassifier()

parameter_grid = {
    'n_estimators': [100, 250, 500],
    'max_features': ['auto', 'log2', 'sqrt'],
    'max_depth': [4, 5, 6, 7, 8, 10],
    'criterion': ['gini', 'entropy']
}

random_forest_grid_CV = GridSearchCV(random_forest_model, parameter_grid, cv = 5, n_jobs = -1)
random_forest_grid_CV.fit(x_train_label, y_train_label)

In [None]:
print('Best Parameter: ', random_forest_grid_CV.best_params_)

In [None]:
random_forest_model = RandomForestClassifier(criterion = 'entropy', max_depth = 10, max_features = 'log2', n_estimators =  100)

random_forest_model.fit(x_train_2d, y_train_2d)

In [None]:
random_forest_model_y_true, random_forest_model_y_predict = y_test_2d, random_forest_model.predict(x_test_2d)

In [None]:
print(classification_report(y_test_2d, random_forest_model_y_predict))

### LightGBM Model

In [None]:
# Long Execution Time; 20 minutes
lightGBM_model = lgb.LGBMClassifier()

parameter_grid = {
    'max_bin': [300, 500],
    'learning_rate': [0.01, 0.1, 1, 10, 100],
    'n_estimators': [100, 250, 500],
    'num_leaves': [10, 50, 100, 250],
    'max_depth': [-1],
    'objective': ['softmax']
}

lightGBM_grid_CV = GridSearchCV(lightGBM_model, parameter_grid, cv = 5, n_jobs = -1)
lightGBM_grid_CV.fit(x_train_label, y_train_label)

In [None]:
print('Best Parameter: ', lightGBM_grid_CV.best_params_)

In [None]:
lightGBM_model = lgb.LGBMClassifier(learning_rate = 0.1, max_bin = 300, max_depth = -1, num_leaves = 100, objective = 'softmax', n_estimators = 100)

lightGBM_model.fit(x_train_label, y_train_label)

In [None]:
lightGBM_model_y_true, lightGBM_model_y_predict = y_test_label, lightGBM_model.predict(x_test_label)

In [None]:
print(classification_report(y_test_label, lightGBM_model_y_predict))

### LSTM Model

In [None]:
lstm_model = Sequential()

lstm_model.add(LSTM(128, input_shape = (11, 1), return_sequences = True))
lstm_model.add(Dropout(0.3))
lstm_model.add(Flatten())
lstm_model.add(Dense(64, activation = 'relu'))
lstm_model.add(Dense(7, activation = 'sigmoid'))

lstm_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ['accuracy'])

lstm_model.summary()

In [None]:
lstm_model_history = lstm_model.fit(x_train, y_train, epochs = 150, validation_split = 0.2)

In [None]:
epoch = [i for i in range(150)]

figure, axis = plt.subplots(1, 2)

train_accuracy = lstm_model_history.history['accuracy']
train_loss = lstm_model_history.history['loss']
test_accuracy = lstm_model_history.history['val_accuracy']
test_loss = lstm_model_history.history['val_loss']

figure.set_size_inches(24, 5)

axis[0].plot(epoch, train_loss, label = 'Training Loss')
axis[0].plot(epoch, test_loss, label = 'Testing Loss')
axis[0].set_title('Training & Testing Loss')
axis[0].set_xlabel('Epochs')
axis[0].legend()

axis[1].plot(epoch, train_accuracy, label = 'Training Accuracy')
axis[1].plot(epoch, test_accuracy, label = 'Testing Accuracy')
axis[1].set_title('Training & Testing Accuracy')
axis[1].set_xlabel('Epochs')
axis[1].legend()

In [None]:
lstm_model_predict_test = lstm_model.predict(x_test)
lstm_model_y_predict = encoder.inverse_transform(lstm_model_predict_test)
lstm_model_y_test = encoder.inverse_transform(y_test)

In [None]:
lstm_model_prediction_dataframe = pd.DataFrame(columns = ['Predicted Label', 'Actual Label'])
lstm_model_prediction_dataframe['Predicted Label'] = lstm_model_y_predict.flatten()
lstm_model_prediction_dataframe['Actual Label'] = lstm_model_y_test.flatten()

lstm_model_prediction_dataframe.head(10)

In [None]:
print(classification_report(lstm_model_y_test, lstm_model_y_predict))

### Model - TPOT Analysis

In [None]:
k_fold = KFold(n_splits = 5, random_state = 42, shuffle = True)

for train_index, val_index in k_fold.split(X):
  x_train, x_val =  X.iloc[train_index], X.iloc[val_index]
  y_train, y_val =  Y_label.iloc[train_index], Y_label.iloc[val_index]

In [None]:
# Long execution time; 50 minutes
gradient_booster_model = GradientBoostingClassifier()

parameter_grid = {
    'criterion': ['friedman_mse', 'squared_error'],
    'learning_rate': [0.01, 0.025, 0.1, 0.25, 1, 10, 100],
    'max_depth': [3, 5, 7, 9],
    'n_estimators': [10, 100]
}

gradient_booster_grid_CV = GridSearchCV(gradient_booster_model, parameter_grid, cv = 5, n_jobs = -1)
gradient_booster_grid_CV.fit(x_train, y_train)

In [None]:
print('Best Parameter: ', gradient_booster_grid_CV.best_params_)

In [None]:
gradient_booster_model = GradientBoostingClassifier(criterion = 'friedman_mse', learning_rate = 0.1, max_depth = 9, n_estimators = 100)

gradient_booster_model.fit(x_train, y_train)

In [None]:
gradient_booster_model_y_true, gradient_booster_model_y_predict = y_test_label, gradient_booster_model.predict(x_test_label)

In [None]:
print(classification_report(y_test_label, gradient_booster_model_y_predict))

### Accuracy Check

In [None]:
print('Accuracy of Sequential Model: ', sequential_model.evaluate(x_test, y_test)[1] * 100, '%.')
print('Accuracy of LGBMClassifier Model: ', accuracy_score(y_test_label, lightGBM_model_y_predict) * 100, '%.')
print('Accuracy of RandomForestClassifier Model: ', accuracy_score(y_test_2d, random_forest_model_y_predict) * 100, '%.')
print('Accuracy of DecisionTreeClassifier Model: ', accuracy_score(y_test_2d, dt_classifier_model_y_predict) * 100, '%.')
print('Accuracy of MLPClassifier Model: ', accuracy_score(mlp_classifier_model_y_true, mlp_classifier_model_y_predict) * 100, '%.')
print('Accuracy of Sequential Conv1D Model: ', sequential_conv1D_model.evaluate(x_test, y_test)[1] * 100, '%.')
print('Accuracy of LSTM Model: ', lstm_model.evaluate(x_test, y_test)[1] * 100, '%.')
print('Accuracy of Gradient Boosting Classifier Model: ', gradient_booster_model.score(x_test_label, y_test_label) * 100, '%.')