##### Imports

In [None]:
import numpy as np
import keras
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.combine import SMOTETomek
from imblearn.under_sampling import RandomUnderSampler
from keras import backend as K
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras import layers
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D

#### Image visualization

In [None]:
def showImages(images, labels, offset=0):
    class_names = ['Spots', 'Eyespots']

    fig = plt.figure(figsize=(10, 10))
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i + offset])
        plt.xlabel(class_names[labels[i+offset]])

    fig.patch.set_facecolor('w')
    plt.show()

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

def showImagesGray(images, labels, offset=0):
    class_names = ['Spots', 'Eyespots']

    fig = plt.figure(figsize=(10, 10))
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i + offset], cmap="gray")
        plt.xlabel(class_names[labels[i+offset]])

    fig.patch.set_facecolor('w')
    plt.show()

#### F1 score

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

### Loading data

In [None]:
data_x = np.load('Xtrain_Classification1.npy')
data_y = np.load('ytrain_Classification1.npy')

print(data_y.shape)

print(f"Data X: {data_x.shape[0]} images of shape: {data_x.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(data_y)}")

# Number of eyespots
print(f"Number of eyespots: {data_y.shape[0] - np.sum(data_y)}")

# Split the data into training and test sets

x_train, x_validation, y_train, y_validation = train_test_split(data_x, data_y, test_size=0.2, random_state=42)

### Preprocessing

### Reshape data

In [None]:
# # Reshaping to 30x30x3 for 30x30 RGB images

# x_train_over = x_train_over.reshape(x_train_over.shape[0], 30, 30, 3)
# x_train_under = x_train_under.reshape(x_train_under.shape[0], 30, 30, 3)

# x_validation = x_validation.reshape(x_validation.shape[0], 30, 30, 3)

# print("\nOver-sampled data:")
# print(f"Training set: {x_train_over.shape[0]} images of shape: {x_train_over.shape[1:]}")

# print("\nUnder-sampled data:")
# print(f"Training set: {x_train_under.shape[0]} images of shape: {x_train_under.shape[1:]}")

# print(f"Validation set: {x_validation.shape[0]} images of shape: {x_validation.shape[1:]}")

# # Checking classifcation of images

# showImages(x_train_over, y_train_over, 0)

x_train = x_train.reshape(x_train.shape[0], 30, 30, 3)
x_validation = x_validation.reshape(x_validation.shape[0], 30, 30, 3)

### Turn images to grayscale

In [None]:
x_train = rgb2gray(x_train)
x_validation = rgb2gray(x_validation)

# Reshape the images to 1D

x_train = x_train.reshape(x_train.shape[0], 30*30*1)
x_validation = x_validation.reshape(x_validation.shape[0], 30*30*1)

# # Turn images to grayscale

# x_train_over = rgb2gray(x_train_over)
# x_train_under = rgb2gray(x_train_under)

# x_validation = rgb2gray(x_validation)

# # Reshape the images to 3D

# x_train_over = x_train_over.reshape(x_train_over.shape[0], 30, 30, 1)
# x_train_under = x_train_under.reshape(x_train_under.shape[0], 30, 30, 1)

# x_validation = x_validation.reshape(x_validation.shape[0], 30, 30, 1)

### Dealing with imbalanced data

In [None]:
# Random oversampling

smote = SMOTE(sampling_strategy='minority', random_state=42)
x_train_over, y_train_over = smote.fit_resample(x_train, y_train)

print("\nOver-sampled data:")
print(f"Data X: {x_train_over.shape[0]} images of shape: {x_train.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(y_train_over)}")

# Number of eyespots
print(f"Number of eyespots: {y_train_over.shape[0] - np.sum(y_train_over)}")

# Random undersampling

rus = RandomUnderSampler(sampling_strategy='majority', random_state=42)
x_train_under, y_train_under = rus.fit_resample(x_train, y_train)

print("\nUnder-sampled data:")
print(f"Data X: {x_train_under.shape[0]} images of shape: {x_train.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(y_train_under)}")

# Number of eyespots
print(f"Number of eyespots: {y_train_under.shape[0] - np.sum(y_train_under)}")

# Reshaping to 30x30x1 for 30x30 grayscale images

x_train_over = x_train_over.reshape(x_train_over.shape[0], 30, 30, 1)
x_train_under = x_train_under.reshape(x_train_under.shape[0], 30, 30, 1)

x_validation = x_validation.reshape(x_validation.shape[0], 30, 30, 1)

# Show some images

showImagesGray(x_train_over, y_train_over, 0)


### Normalization

In [None]:
# Normalizing the data
# Making sure that values are float

x_train_over = x_train_over.astype('float32')
x_train_under = x_train_under.astype('float32')

x_validation = x_validation.astype('float32')

# Normalizing the RGB codes by dividing it to the max RGB value

x_train_over = x_train_over / 255
x_train_under = x_train_under / 255

x_validation = x_validation / 255


### One-hot encoding

In [None]:
# One-hot encoding

y_train_over = to_categorical(y_train_over)
y_train_under = to_categorical(y_train_under)

y_validation = to_categorical(y_validation)

# check amount of data in each class

print("\nOver-sampled data:")
print(f"Spots: {np.sum(y_train_over[:, 0])}")
print(f"Eyespots: {np.sum(y_train_over[:, 1])}")

print("\nUnder-sampled data:")
print(f"Spots: {np.sum(y_train_under[:, 0])}")
print(f"Eyespots: {np.sum(y_train_under[:, 1])}")

# Image analysis

## CNN

In [None]:
batch_size = 128
epochs = 200
num_classes = 2
input_shape = (30, 30, 1)

### Model Construction

#### Image Augmentation layer

In [None]:
data_augmentation = Sequential([
    layers.RandomFlip("horizontal_and_vertical", input_shape=input_shape),
    layers.RandomRotation(0.2)
])

### Model with oversampling

In [None]:
model_over = Sequential()

# Convolutional and pooling layers

model_over.add(data_augmentation)

model_over.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

model_over.add(Dropout(0.1))

model_over.add(MaxPooling2D(pool_size=(2, 2)))

model_over.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))

model_over.add(Dropout(0.2))

model_over.add(MaxPooling2D(pool_size=(2, 2)))

model_over.add(Conv2D(128, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

model_over.add(Dropout(0.3))

model_over.add(MaxPooling2D(pool_size=(2, 2)))

# Fully connected layers

model_over.add(Flatten())

model_over.add(Dense(128, activation='relu'))

model_over.add(Dropout(0.1))

model_over.add(Dense(64, activation='relu'))

model_over.add(Dropout(0.25))

model_over.add(Dense(num_classes, activation='sigmoid'))

model_over.summary()

### Model with undersampling

In [None]:
model_under = Sequential()

# Convolutional and pooling layers

model_under.add(data_augmentation)

model_under.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

model_under.add(Dropout(0.1))

model_under.add(MaxPooling2D(pool_size=(2, 2)))

model_under.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))

model_under.add(Dropout(0.2))

model_under.add(MaxPooling2D(pool_size=(2, 2)))

model_under.add(Conv2D(128, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

model_under.add(Dropout(0.3))

model_under.add(MaxPooling2D(pool_size=(2, 2)))

# Fully connected layers

model_under.add(Flatten())

model_under.add(Dense(128, activation='relu'))

model_under.add(Dropout(0.1))

model_under.add(Dense(64, activation='relu'))

model_under.add(Dropout(0.25))

model_under.add(Dense(num_classes, activation='sigmoid'))

model_under.summary()

### Compile and train the models

In [None]:
model_over.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy', f1_m])

history_over = model_over.fit(x_train_over, y_train_over, 
        batch_size=batch_size, 
        epochs=epochs, 
        verbose=1, 
        validation_data=(x_validation, y_validation), 
        callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])

In [None]:
model_under.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy', f1_m])

history_under = model_under.fit(x_train_under, y_train_under, 
        batch_size=batch_size, 
        epochs=epochs, 
        verbose=1, 
        validation_data=(x_validation, y_validation),
        callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])

## Visualize the Results

In [None]:
# F1 score

y_pred_over = model_over.predict(x_validation)
y_pred_over = np.argmax(y_pred_over, axis=1)

y_pred_under = model_under.predict(x_validation)
y_pred_under = np.argmax(y_pred_under, axis=1)

y_true = np.argmax(y_validation, axis=1)

plt.figure(figsize=(10, 10))

plt.subplot(2, 1, 1)
plt.title('Over-sampled data')
plt.plot(history_over.history['accuracy'], label='Training accuracy')
plt.plot(history_over.history['loss'], label='Training loss')
plt.plot(history_over.history['val_accuracy'], label='Validation accuracy')
plt.plot(history_over.history['val_loss'], label='Validation loss')
plt.plot(history_over.history['f1_m'], label='Training F1 score')
plt.plot(history_over.history['val_f1_m'], label='Validation F1 score')
plt.xlabel('Epoch')
plt.legend(loc='best')

plt.subplot(2, 1, 2)
plt.title('Under-sampled data')
plt.plot(history_under.history['accuracy'], label='Training accuracy')
plt.plot(history_under.history['loss'], label='Training loss')
plt.plot(history_under.history['val_accuracy'], label='Validation accuracy')
plt.plot(history_under.history['val_loss'], label='Validation loss')
plt.plot(history_under.history['f1_m'], label='Training F1 score')
plt.plot(history_under.history['val_f1_m'], label='Validation F1 score')
plt.xlabel('Epoch')
plt.legend(loc='best')

validation_loss_over, validation_accuracy_over, f1_score_over = model_over.evaluate(x_validation, y_validation, verbose=0)
validation_loss_under, validation_accuracy_under, f1_score_under = model_under.evaluate(x_validation, y_validation, verbose=0)

print("Over-sampled data: Validation loss: {:.2f}, Validation accuracy: {:.2f}, F1 Score: {:.2f}".format(validation_loss_over, validation_accuracy_over, f1_score_over))
print("Under-sampled data: Validation loss: {:.2f}, Validation accuracy: {:.2f}, F1 Score: {:.2f}".format(validation_loss_under, validation_accuracy_under, f1_score_under))

print(f1_score(y_true, y_pred_over, average='macro'))

# plot confusion matrix

cm_over = confusion_matrix(y_true, y_pred_over)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_over)
disp.plot()
plt.show()

cm_under = confusion_matrix(y_true, y_pred_under)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_under)
disp.plot()
plt.show()

In [None]:
data_x = data_x.reshape(data_x.shape[0], 30, 30, 3)
data_y = to_categorical(data_y, num_classes)

data_x = rgb2gray(data_x)
data_x = data_x.reshape(data_x.shape[0], 30, 30, 1)

data_x = data_x.astype('float32')
data_x /= 255

In [None]:
# test entire dataset

loss_over, accuracy_over, f1_score_over = model_over.evaluate(data_x, data_y, verbose=0)
loss_under, accuracy_under, f1_score_under = model_under.evaluate(data_x, data_y, verbose=0)

print("Over-sampled model: Loss: {:.2f}, Accuracy: {:.2f}, F1 Score: {:.2f}".format(loss_over, accuracy_over, f1_score_over))
print("Under-sampled model: Loss: {:.2f}, Accuracy: {:.2f}, F1 Score: {:.2f}".format(loss_under, accuracy_under, f1_score_under))

y_true = np.argmax(data_y, axis=1)

y_pred_over = model_over.predict(data_x)
y_pred_over = np.argmax(y_pred_over, axis=1)

print(f1_score(y_true, y_pred_over, average='macro'))

# plot confusion matrix

cm_over = confusion_matrix(y_true, y_pred_over)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_over)
disp.plot()
plt.show()

y_pred_under = model_under.predict(data_x)
y_pred_under = np.argmax(y_pred_under, axis=1)

cm_under = confusion_matrix(y_true, y_pred_under)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_under)
disp.plot()
plt.show()

In [None]:
# model_over.save('model_over_90.h5')
# model_under.save('model_under_88.h5')

model_91 = load_model('model_over_91.h5', custom_objects={'f1_m': f1_m})

In [None]:
validation_loss_over, validation_accuracy_over, f1_score_over = model_91.evaluate(x_validation, y_validation, verbose=0)

print("Over-sampled data: Validation loss: {:.2f}, Validation accuracy: {:.2f}, F1 Score: {:.2f}".format(validation_loss_over, validation_accuracy_over, f1_score_over))

loss_over, accuracy_over, f1_score_over = model_91.evaluate(data_x, data_y, verbose=0)

print("Over-sampled model: Loss: {:.2f}, Accuracy: {:.2f}, F1 Score: {:.2f}".format(loss_over, accuracy_over, f1_score_over))

y_true = np.argmax(y_validation, axis=1)

y_pred_over = model_91.predict(x_validation)
y_pred_over = np.argmax(y_pred_over, axis=1)

print(f1_score(y_true, y_pred_over, average='macro'))

y_true = np.argmax(data_y, axis=1)

y_pred_over = model_91.predict(data_x)
y_pred_over = np.argmax(y_pred_over, axis=1)

print(f1_score(y_true, y_pred_over, average='macro'))

# plot confusion matrix

cm_over = confusion_matrix(y_true, y_pred_over)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_over)
disp.plot()
plt.show()

In [None]:
X_test = np.load('Xtest_Classification1.npy')

X_test = X_test.reshape(X_test.shape[0], 30, 30, 3)
X_test = rgb2gray(X_test)
X_test = X_test.reshape(X_test.shape[0], 30, 30, 1)

X_test = X_test.astype('float32')
X_test /= 255

y_pred = model_91.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

np.save('YPredict_Classification1.npy', y_pred)