## Importing Libraries

In [30]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from prettytable import PrettyTable
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers import BatchNormalization, Dropout
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")

## Loading Moment Invariants

In [31]:
# reading the generated training moment
train_df = pd.read_csv('C:/Users/noqui/Desktop/FYP\Work_Folder_Testing/output/moment/Cassava//100/Cassava_Train_512.csv')

# reading the generated testing moment
test_df = pd.read_csv('C:/Users/noqui/Desktop/FYP\Work_Folder_Testing/output/moment/Cassava/100/Cassava_Test_512.csv')

train_df.reset_index(drop = True, inplace = True)
train_df = train_df.dropna()

test_df.reset_index(drop = True, inplace = True)
test_df = test_df.dropna()

### Preparing the Datasets

In [32]:
X_train_df = train_df.sort_values(['label'])
X_train_df = X_train_df.drop(['Inm[1,1]'], axis = 1) # drop Inm[1,1]
X_test_df = test_df.sort_values(['label'])
X_test_df = X_test_df.drop(['Inm[1,1]'], axis = 1) # drop Inm[1,1]

# reset index
X_train_df.reset_index(inplace = True, drop = True)
X_test_df.reset_index(inplace = True, drop = True)

data = X_train_df.append([X_test_df])
y = data['class']

X_train_df = X_train_df.iloc[:,3:230].copy() # excluding (Inm[0,0], Inm[0,1], Inm[1,0])
X_test_df = X_test_df.iloc[:,3:230].copy() # excluding (Inm[0,0], Inm[0,1], Inm[1,0])

# apply normalization 
for column in X_train_df.columns:
    X_train_df[column] = (X_train_df[column] - X_train_df[column].min()) / (X_train_df[column].max() - X_train_df[column].min()) 

for column in X_test_df.columns:
    X_test_df[column] = (X_test_df[column] - X_test_df[column].min()) / (X_test_df[column].max() - X_test_df[column].min()) 

print(X_train_df.shape)
print(X_test_df.shape)
display(X_train_df)
display(X_train_df)

(14980, 227)
(6417, 227)


Unnamed: 0,"Inm[0,2]","Inm[2,0]","Inm[0,3]","Inm[1,2]","Inm[2,1]","Inm[3,0]","Inm[0,4]","Inm[1,3]","Inm[2,2]","Inm[3,1]",...,"Inm[11,9]","Inm[12,8]","Inm[13,7]","Inm[14,6]","Inm[15,5]","Inm[16,4]","Inm[17,3]","Inm[18,2]","Inm[19,1]","Inm[20,0]"
0,0.080198,0.097889,0.009538,0.600463,0.382801,0.560823,0.863587,0.471308,0.899099,0.382024,...,0.458812,0.483395,0.624666,0.394148,0.475241,0.436230,0.369309,0.631207,0.415657,0.709320
1,0.300600,0.371140,0.086635,0.685474,0.413572,0.727353,0.561264,0.680328,0.619227,0.136749,...,0.477605,0.513108,0.683432,0.549288,0.564549,0.478880,0.525941,0.491742,0.643794,0.546375
2,0.116834,0.123254,0.020849,0.609138,0.366738,0.565231,0.814772,0.433776,0.857001,0.420067,...,0.385411,0.445322,0.506545,0.573518,0.415377,0.606877,0.367130,0.545870,0.437492,0.491139
3,0.157244,0.164603,0.033927,0.638301,0.366185,0.545039,0.770974,0.492855,0.799868,0.347873,...,0.421950,0.522118,0.561011,0.370241,0.492356,0.297456,0.460087,0.426757,0.531547,0.489821
4,0.581968,0.690743,0.125258,0.809572,0.498387,0.844144,0.304441,0.341879,0.278852,0.650447,...,0.282702,0.684930,0.515188,0.539361,0.664117,0.375388,0.643017,0.419981,0.564220,0.445665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,0.180040,0.182701,0.034091,0.653720,0.395388,0.618273,0.732266,0.552341,0.775782,0.287869,...,0.467597,0.568799,0.558748,0.483072,0.424700,0.414590,0.366945,0.462520,0.440918,0.492146
14976,0.246096,0.239353,0.006931,0.560176,0.411945,0.565190,0.624444,0.581166,0.710529,0.244915,...,0.511178,0.631692,0.704645,0.685684,0.592208,0.644088,0.545171,0.549210,0.653999,0.489206
14977,0.327762,0.321838,0.170592,0.586232,0.332471,0.592243,0.569727,0.471944,0.602428,0.402386,...,0.366034,0.758119,0.493416,0.506975,0.478493,0.479247,0.481144,0.651477,0.562304,0.681890
14978,0.109982,0.133604,0.024535,0.570839,0.387718,0.552084,0.820546,0.547006,0.858603,0.292849,...,0.396339,0.457801,0.487137,0.534057,0.531898,0.567873,0.601905,0.593012,0.704146,0.594058


Unnamed: 0,"Inm[0,2]","Inm[2,0]","Inm[0,3]","Inm[1,2]","Inm[2,1]","Inm[3,0]","Inm[0,4]","Inm[1,3]","Inm[2,2]","Inm[3,1]",...,"Inm[11,9]","Inm[12,8]","Inm[13,7]","Inm[14,6]","Inm[15,5]","Inm[16,4]","Inm[17,3]","Inm[18,2]","Inm[19,1]","Inm[20,0]"
0,0.080198,0.097889,0.009538,0.600463,0.382801,0.560823,0.863587,0.471308,0.899099,0.382024,...,0.458812,0.483395,0.624666,0.394148,0.475241,0.436230,0.369309,0.631207,0.415657,0.709320
1,0.300600,0.371140,0.086635,0.685474,0.413572,0.727353,0.561264,0.680328,0.619227,0.136749,...,0.477605,0.513108,0.683432,0.549288,0.564549,0.478880,0.525941,0.491742,0.643794,0.546375
2,0.116834,0.123254,0.020849,0.609138,0.366738,0.565231,0.814772,0.433776,0.857001,0.420067,...,0.385411,0.445322,0.506545,0.573518,0.415377,0.606877,0.367130,0.545870,0.437492,0.491139
3,0.157244,0.164603,0.033927,0.638301,0.366185,0.545039,0.770974,0.492855,0.799868,0.347873,...,0.421950,0.522118,0.561011,0.370241,0.492356,0.297456,0.460087,0.426757,0.531547,0.489821
4,0.581968,0.690743,0.125258,0.809572,0.498387,0.844144,0.304441,0.341879,0.278852,0.650447,...,0.282702,0.684930,0.515188,0.539361,0.664117,0.375388,0.643017,0.419981,0.564220,0.445665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,0.180040,0.182701,0.034091,0.653720,0.395388,0.618273,0.732266,0.552341,0.775782,0.287869,...,0.467597,0.568799,0.558748,0.483072,0.424700,0.414590,0.366945,0.462520,0.440918,0.492146
14976,0.246096,0.239353,0.006931,0.560176,0.411945,0.565190,0.624444,0.581166,0.710529,0.244915,...,0.511178,0.631692,0.704645,0.685684,0.592208,0.644088,0.545171,0.549210,0.653999,0.489206
14977,0.327762,0.321838,0.170592,0.586232,0.332471,0.592243,0.569727,0.471944,0.602428,0.402386,...,0.366034,0.758119,0.493416,0.506975,0.478493,0.479247,0.481144,0.651477,0.562304,0.681890
14978,0.109982,0.133604,0.024535,0.570839,0.387718,0.552084,0.820546,0.547006,0.858603,0.292849,...,0.396339,0.457801,0.487137,0.534057,0.531898,0.567873,0.601905,0.593012,0.704146,0.594058


In [33]:
X = X_train_df.append([X_test_df])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X.shape, y.shape

((21397, 227), (21397,))

In [34]:
#Split the data in training set and test set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 28)
    
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(14977, 227) (6420, 227)
(14977,) (6420,)


### Selecting Moments up to nth Order 

In [35]:
# get the number of columns to slice up to n order
def getOrderIndex(n_order):
    return int((n_order + 1) * (n_order + 2) / 2)

# ordPQ
n_order = 5
number_of_index = getOrderIndex(n_order) - 4
print(number_of_index)

# slicing up to order 5
X_train_ord5 = X_train.iloc[:,:number_of_index]
X_test_ord5 = X_test.iloc[:,:number_of_index]

# print shape
print("Train", X_train_ord5.shape)
print("Test", X_test_ord5.shape)
print()
##########################################################
n_order = 10 
number_of_index = getOrderIndex(n_order) - 4
print(number_of_index)

# slicing up to order 10
X_train_ord10 = X_train.iloc[:,:number_of_index]
X_test_ord10 = X_test.iloc[:,:number_of_index]

# print shape
print("Train", X_train_ord10.shape)
print("Test", X_test_ord10.shape)
print()
##########################################################

n_order = 15
number_of_index = getOrderIndex(n_order) - 4
print(number_of_index)

# slicing up to order 15
X_train_ord15 = X_train.iloc[:,:number_of_index]
X_test_ord15 = X_test.iloc[:,:number_of_index]

# print shape
print("Train", X_train_ord15.shape)
print("Test", X_test_ord15.shape)
print()
##########################################################

n_order = 20
number_of_index = getOrderIndex(n_order) - 4
print(number_of_index)

# slicing up to order 20
X_train_ord20 = X_train.iloc[:,:number_of_index]
X_test_ord20 = X_test.iloc[:,:number_of_index]

# print shape
print("Train", X_train_ord20.shape)
print("Test", X_test_ord20.shape)

17
Train (14977, 17)
Test (6420, 17)

62
Train (14977, 62)
Test (6420, 62)

132
Train (14977, 132)
Test (6420, 132)

227
Train (14977, 227)
Test (6420, 227)


In [36]:
trainsets = [
    X_train_ord5, X_train_ord10, X_train_ord15, X_train_ord20
]

testsets = [
    X_test_ord5, X_test_ord10, X_test_ord15, X_test_ord20
] 

featureSelectionMethods = [
    "ord = 5", "ord = 10", "ord = 15", "ord = 20"
]

## Classification

In [37]:
# Classification Model
def modelTraining(model):
    for train, test, fSMethod in zip(trainsets, testsets, featureSelectionMethods):
        print(f"Feature Selection with {fSMethod}")
        model = model.fit(train, y_train)

        summary = PrettyTable()
        summary.field_names = ["Score (%)"]
        summary.add_row([round(model.score(test, y_test) * 100, 2)])
        print(summary)

### Support Vector Classifier

In [38]:
model = SVC()
modelTraining(model)

Feature Selection with ord = 5
+-----------+
| Score (%) |
+-----------+
|   60.79   |
+-----------+
Feature Selection with ord = 10
+-----------+
| Score (%) |
+-----------+
|   60.79   |
+-----------+
Feature Selection with ord = 15
+-----------+
| Score (%) |
+-----------+
|   60.79   |
+-----------+
Feature Selection with ord = 20
+-----------+
| Score (%) |
+-----------+
|   60.79   |
+-----------+


### K-Nearest Neighbors Classifier

In [39]:
model = KNeighborsClassifier(30)
modelTraining(model)

Feature Selection with ord = 5
+-----------+
| Score (%) |
+-----------+
|   60.75   |
+-----------+
Feature Selection with ord = 10
+-----------+
| Score (%) |
+-----------+
|   60.81   |
+-----------+
Feature Selection with ord = 15
+-----------+
| Score (%) |
+-----------+
|   60.81   |
+-----------+
Feature Selection with ord = 20
+-----------+
| Score (%) |
+-----------+
|   60.81   |
+-----------+


### Decision Tree Classifier

In [40]:
model = DecisionTreeClassifier(criterion = 'entropy', max_depth = 4, splitter = 'best')
modelTraining(model)

Feature Selection with ord = 5
+-----------+
| Score (%) |
+-----------+
|   60.62   |
+-----------+
Feature Selection with ord = 10
+-----------+
| Score (%) |
+-----------+
|   60.83   |
+-----------+
Feature Selection with ord = 15
+-----------+
| Score (%) |
+-----------+
|    60.9   |
+-----------+
Feature Selection with ord = 20
+-----------+
| Score (%) |
+-----------+
|   60.73   |
+-----------+


### Random Forest Classifier

In [41]:
model = RandomForestClassifier(max_depth = 6, random_state = 10)
modelTraining(model)

Feature Selection with ord = 5
+-----------+
| Score (%) |
+-----------+
|   60.81   |
+-----------+
Feature Selection with ord = 10
+-----------+
| Score (%) |
+-----------+
|   60.83   |
+-----------+
Feature Selection with ord = 15
+-----------+
| Score (%) |
+-----------+
|   60.83   |
+-----------+
Feature Selection with ord = 20
+-----------+
| Score (%) |
+-----------+
|   60.79   |
+-----------+


### Neural Network

In [42]:
# convert the labels from integers to one-hot-encoded vectors
lb = LabelEncoder()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)

print(y_train.shape)
print(y_test.shape)

(14977,)
(6420,)


In [43]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes = 5)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = 5)

print(y_train.shape)
print(y_test.shape)

(14977, 5)
(6420, 5)


In [44]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_accuracy', mode = 'max', min_delta = 0.01,
                                                                                    patience = 3, factor = 0.25, verbose = 1, cooldown = 0, min_lr = 0.0001)

early_stopper = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', mode = 'max', min_delta = 0.005,
                                                                                 patience = 10, verbose = 1, restore_best_weights = True)

In [45]:
for i, train_set in enumerate(trainsets):
    input_shape = train_set.shape[1]
    test_set = testsets[i]
    fSMethod = featureSelectionMethods[i]

    print(f"Feature Selection with {fSMethod}")

    model = Sequential()
    model.add(Dense(1024, input_shape = (input_shape,), activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation = "softmax"))

    model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    model.fit(train_set, y_train, validation_data = (test_set, y_test), epochs = 20, callbacks = [early_stopper, reduce_lr])

    score = model.evaluate(test_set, y_test, verbose = 1)
    print('Test accuracy:', score[1])
    print('\n')

Feature Selection with ord = 5
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6079439520835876


Feature Selection with ord = 10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6079439520835876


Feature Selection with ord = 15
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epo

Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping
Test accuracy: 0.6079439520835876


