## Importing Libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from prettytable import PrettyTable
from sklearn.preprocessing import LabelEncoder, LabelBinarizer

from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers import BatchNormalization, Dropout
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")

## Loading Moment Invariants

In [2]:
# reading the generated training moment
train_df = pd.read_csv('C:/Users/noqui/Desktop/FYP\Work_Folder_Testing/output/moment/Cassava/100/Cassava_Train_512.csv')

# reading the generated testing moment
test_df = pd.read_csv('C:/Users/noqui/Desktop/FYP\Work_Folder_Testing/output/moment/Cassava/100/Cassava_Test_512.csv')

train_df.reset_index(drop = True, inplace = True)
train_df = train_df.dropna()

test_df.reset_index(drop = True, inplace = True)
test_df = test_df.dropna()

### Preparing the Datasets

In [3]:
X_train_df = train_df.sort_values(['label'])
X_train_df = X_train_df.drop(['Inm[1,1]'], axis = 1) # drop Inm[1,1]
X_test_df = test_df.sort_values(['label'])
X_test_df = X_test_df.drop(['Inm[1,1]'], axis = 1) # drop Inm[1,1]

# reset index
X_train_df.reset_index(inplace = True, drop = True)
X_test_df.reset_index(inplace = True, drop = True)

data = X_train_df.append([X_test_df])
y = data['class']

X_train_df = X_train_df.iloc[:,3:230].copy() # excluding (Inm[0,0], Inm[0,1], Inm[1,0])
X_test_df = X_test_df.iloc[:,3:230].copy() # excluding (Inm[0,0], Inm[0,1], Inm[1,0])

# apply normalization 
for column in X_train_df.columns:
    X_train_df[column] = (X_train_df[column] - X_train_df[column].min()) / (X_train_df[column].max() - X_train_df[column].min()) 

for column in X_test_df.columns:
    X_test_df[column] = (X_test_df[column] - X_test_df[column].min()) / (X_test_df[column].max() - X_test_df[column].min()) 
    
print(X_train_df.shape)
print(X_test_df.shape)

(14980, 227)
(6417, 227)


In [4]:
X = X_train_df.append([X_test_df])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X.shape, y.shape

((21397, 227), (21397,))

In [5]:
#Split the data in training set and test set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 27)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(14977, 227) (6420, 227)
(14977,) (6420,)


In [6]:
def correlation(dataset, threshold):
    col_corr = set() # set will contain unique values.
    corr_matrix = dataset.corr() # finding the correlation between columns.
    for i in range(len(corr_matrix.columns)): 
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # checking the correlation between columns.
                colName = corr_matrix.columns[i]
                col_corr.add(colName) # adding the correlated column name heigher than threshold value.
    return col_corr

col = correlation(X_train, 0.7)

#remove correlated columns
X_train.drop(columns = col, axis = 1, inplace = True)
X_test.drop(columns = col, axis = 1, inplace = True)

# lets check the shape of training set and test set.
X_train.shape, X_test.shape

((14977, 19), (6420, 19))

In [7]:
model = SFS(KNeighborsClassifier(n_neighbors = 5), k_features = 10, forward = True, floating = False, 
            n_jobs = -1, verbose = 2, scoring = 'accuracy', cv = 5)

model.fit(X_train, y_train) 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of  19 | elapsed:    2.9s remaining:    6.5s
[Parallel(n_jobs=-1)]: Done  16 out of  19 | elapsed:    3.8s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done  19 out of  19 | elapsed:    3.9s finished

[2023-06-08 22:52:18] Features: 1/10 -- score: 0.5213334640957002[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  18 | elapsed:    1.1s remaining:    3.1s
[Parallel(n_jobs=-1)]: Done  15 out of  18 | elapsed:    2.2s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  18 out of  18 | elapsed:    2.2s finished

[2023-06-08 22:52:21] Features: 2/10 -- score: 0.5248054724050543[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  17 | elapsed:    1.4s remaining:    6.8s
[Parallel(n_jobs=-1)]: Done  12 out of  17 | elapsed:    2.8s remaining:    1.1s
[Parallel

SequentialFeatureSelector(estimator=KNeighborsClassifier(), k_features=10,
                          n_jobs=-1, scoring='accuracy', verbose=2)

In [8]:
X_train_Features = model.transform(X_train)
X_test_Features = model.transform(X_test)

X_train_Features_4 = X_train_Features[:, 0:4]
X_test_Features_4 = X_test_Features[:, 0:4]
print("Train", X_train_Features_4.shape)
print("Test", X_test_Features_4.shape)
print()
##########################################################

X_train_Features_6 = X_train_Features[:, 0:6]
X_test_Features_6 = X_test_Features[:, 0:6]
print("Train", X_train_Features_6.shape)
print("Test", X_test_Features_6.shape)
print()
##########################################################

X_train_Features_8 = X_train_Features[:, 0:8]
X_test_Features_8 = X_test_Features[:, 0:8]
print("Train", X_train_Features_8.shape)
print("Test", X_test_Features_8.shape)
print()
##########################################################

X_train_Features_10 = X_train_Features[:, 0:10]
X_test_Features_10 = X_test_Features[:, 0:10]
print("Train", X_train_Features_10.shape)
print("Test", X_test_Features_10.shape)

Train (14977, 4)
Test (6420, 4)

Train (14977, 6)
Test (6420, 6)

Train (14977, 8)
Test (6420, 8)

Train (14977, 10)
Test (6420, 10)


In [9]:
num_components = 2
pca = PCA(n_components=num_components)

principalComponents_train = pca.fit_transform(X_train_Features_10)
X_train_pca2 = pd.DataFrame(principalComponents_train)
print("Train", X_train_pca2.shape)

principalComponents_test = pca.fit_transform(X_test_Features_10)
X_test_pca2 = pd.DataFrame(principalComponents_test)
print("Test", X_test_pca2.shape)
print()
##########################################################

num_components = 4
pca = PCA(n_components = num_components)

principalComponents_train = pca.fit_transform(X_train_Features_10)
X_train_pca4 = pd.DataFrame(principalComponents_train)
print("Train", X_train_pca4.shape)

principalComponents_test = pca.fit_transform(X_test_Features_10)
X_test_pca4 = pd.DataFrame(principalComponents_test)
print("Test", X_test_pca4.shape)
print()
##########################################################

num_components = 6
pca = PCA(n_components = num_components)

principalComponents_train = pca.fit_transform(X_train_Features_10)
X_train_pca6 = pd.DataFrame(principalComponents_train)
print("Train", X_train_pca6.shape)

principalComponents_test = pca.fit_transform(X_test_Features_10)
X_test_pca6 = pd.DataFrame(principalComponents_test)
print("Test", X_test_pca6.shape)

Train (14977, 2)
Test (6420, 2)

Train (14977, 4)
Test (6420, 4)

Train (14977, 6)
Test (6420, 6)


In [10]:
trainsets = [
    X_train_Features_4, X_train_Features_6, X_train_Features_8, X_train_Features_10, X_train_pca2, X_train_pca4, X_train_pca6
]

testsets = [
    X_test_Features_4, X_test_Features_6, X_test_Features_8, X_test_Features_10, X_test_pca2, X_test_pca4, X_test_pca6
]

featureSelectionMethods = [
    "4 Features", "6 Features", "8 Features", "10 Features", "PCA 2", "PCA 4", "PCA 6"
]

## Classification

In [11]:
# Classification Model
def modelTraining(model):
    for train, test, fSMethod in zip(trainsets, testsets, featureSelectionMethods):
        print(f"Feature Selection with {fSMethod}")
        model = model.fit(train, y_train)

        summary = PrettyTable()
        summary.field_names = ["Score (%)"]
        summary.add_row([round(model.score(test, y_test) * 100, 2)])
        print(summary)

### Support Vector Classifier

In [12]:
model = SVC()
modelTraining(model)

Feature Selection with 4 Features
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with 6 Features
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with 8 Features
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with 10 Features
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with PCA 2
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with PCA 4
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with PCA 6
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+


### K-Nearest Neighbors Classifier

In [13]:
model = KNeighborsClassifier(30)
modelTraining(model)

Feature Selection with 4 Features
+-----------+
| Score (%) |
+-----------+
|   62.73   |
+-----------+
Feature Selection with 6 Features
+-----------+
| Score (%) |
+-----------+
|   62.71   |
+-----------+
Feature Selection with 8 Features
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with 10 Features
+-----------+
| Score (%) |
+-----------+
|   62.73   |
+-----------+
Feature Selection with PCA 2
+-----------+
| Score (%) |
+-----------+
|   62.69   |
+-----------+
Feature Selection with PCA 4
+-----------+
| Score (%) |
+-----------+
|   62.69   |
+-----------+
Feature Selection with PCA 6
+-----------+
| Score (%) |
+-----------+
|   62.73   |
+-----------+


### Decision Tree Classifier

In [14]:
model = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, splitter = 'best')
modelTraining(model)

Feature Selection with 4 Features
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with 6 Features
+-----------+
| Score (%) |
+-----------+
|   62.69   |
+-----------+
Feature Selection with 8 Features
+-----------+
| Score (%) |
+-----------+
|   62.62   |
+-----------+
Feature Selection with 10 Features
+-----------+
| Score (%) |
+-----------+
|   62.62   |
+-----------+
Feature Selection with PCA 2
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with PCA 4
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with PCA 6
+-----------+
| Score (%) |
+-----------+
|   62.77   |
+-----------+


### Random Forest Classifier

In [15]:
model = RandomForestClassifier(max_depth = 6, random_state = 0)
modelTraining(model)

Feature Selection with 4 Features
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with 6 Features
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+
Feature Selection with 8 Features
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with 10 Features
+-----------+
| Score (%) |
+-----------+
|   62.73   |
+-----------+
Feature Selection with PCA 2
+-----------+
| Score (%) |
+-----------+
|   62.74   |
+-----------+
Feature Selection with PCA 4
+-----------+
| Score (%) |
+-----------+
|   62.77   |
+-----------+
Feature Selection with PCA 6
+-----------+
| Score (%) |
+-----------+
|   62.76   |
+-----------+


### Neural Network

In [16]:
# convert the labels from integers to one-hot-encoded vectors
lb = LabelEncoder()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)

print(y_train.shape)
print(y_test.shape)

(14977,)
(6420,)


In [17]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes = 5)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = 5)

print(y_train.shape)
print(y_test.shape)

(14977, 5)
(6420, 5)


In [18]:
# Reduce learning rate when there is a change lesser than <min_delta> in <val_accuracy> for more than <patience> epochs
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_accuracy', mode = 'max', min_delta = 0.01,
                                                                                    patience = 3, factor = 0.25, verbose = 1, cooldown = 0, min_lr = 0.0001)

early_stopper = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', mode = 'max', min_delta = 0.005,
                                                                                 patience = 10, verbose = 1, restore_best_weights = True)

In [19]:
for i, train_set in enumerate(trainsets):
    input_shape = train_set.shape[1]
    test_set = testsets[i]
    fSMethod = featureSelectionMethods[i]

    print(f"Feature Selection with {fSMethod}")

    model = Sequential()
    model.add(Dense(1024, input_shape = (input_shape,), activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation = "softmax"))

    model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    model.fit(train_set, y_train, validation_data = (test_set, y_test), epochs = 20, callbacks = [early_stopper, reduce_lr])

    score = model.evaluate(test_set, y_test, verbose = 1)
    print('Test accuracy:', score[1])
    print('\n')

Feature Selection with 4 Features
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6277258396148682


Feature Selection with 6 Features
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6275700926780701


Feature Selection with 8 Features
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11

Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6275700926780701


Feature Selection with PCA 2
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6275700926780701


Feature Selection with PCA 4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6275700926780701


Feature Selection with PCA 6
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rat

Test accuracy: 0.6275700926780701


