##### Imports

In [None]:
import numpy as np
import keras
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from keras.utils import to_categorical
from keras.models import Sequential
from keras import layers
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D

#### Image visualization

In [None]:
def showImages(images, labels, offset=0):
    class_names = ['Spots', 'Eyespots']

    fig = plt.figure(figsize=(10, 10))
    for i in range(30):
        plt.subplot(6, 5, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i + offset])
        plt.xlabel(class_names[labels[i+offset]])

    fig.patch.set_facecolor('w')
    plt.show()

### Loading data

In [None]:
X_train = np.load('Xtrain_Classification1.npy')
y_train = np.load('ytrain_Classification1.npy')

print(f"Data X: {X_train.shape[0]} images of shape: {X_train.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(y_train)}")

# Number of eyespots
print(f"Number of eyespots: {y_train.shape[0] - np.sum(y_train)}")

# Split the data into training and test sets

x_train, x_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

### Dealing with imbalanced data

In [None]:
# Random oversampling

ros = SMOTE(sampling_strategy='minority', random_state=42)
x_train_over, y_train_over = ros.fit_resample(x_train, y_train)

print("\nOver-sampled data:")
print(f"Data X: {x_train_over.shape[0]} images of shape: {x_train.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(y_train_over)}")

# Number of eyespots
print(f"Number of eyespots: {y_train_over.shape[0] - np.sum(y_train_over)}")

# Random undersampling

rus = RandomUnderSampler(sampling_strategy='majority', random_state=42)
x_train_under, y_train_under = rus.fit_resample(x_train, y_train)

print("\nUnder-sampled data:")
print(f"Data X: {x_train_under.shape[0]} images of shape: {x_train.shape[1:]}")

# Number of spots
print(f"Number of spots: {np.sum(y_train_under)}")

# Number of eyespots
print(f"Number of eyespots: {y_train_under.shape[0] - np.sum(y_train_under)}")


### Preprocessing

In [None]:
# Reshaping to 30x30x3 for 30x30 RGB images

x_train_over = x_train_over.reshape(x_train_over.shape[0], 30, 30, 3)
x_train_under = x_train_under.reshape(x_train_under.shape[0], 30, 30, 3)

x_validation = x_validation.reshape(x_validation.shape[0], 30, 30, 3)

print("\nOver-sampled data:")
print(f"Training set: {x_train_over.shape[0]} images of shape: {x_train_over.shape[1:]}")

print("\nUnder-sampled data:")
print(f"Training set: {x_train_under.shape[0]} images of shape: {x_train_under.shape[1:]}")

print(f"Validation set: {x_validation.shape[0]} images of shape: {x_validation.shape[1:]}")

# Checking classifcation of images

showImages(x_train_over, y_train_over, 0)
showImages(x_train_under, y_train_under, 0)

# Normalizing the data
# Making sure that values are float

x_train_over = x_train_over.astype('float32')
x_train_under = x_train_under.astype('float32')

x_validation = x_validation.astype('float32')

# Normalizing the RGB codes by dividing it to the max RGB value

x_train_over = x_train_over / 255
x_train_under = x_train_under / 255

x_validation = x_validation / 255


In [None]:
# One-hot encoding

y_train_over = to_categorical(y_train_over)
y_train_under = to_categorical(y_train_under)

y_validation = to_categorical(y_validation)

# check amount of data in each class

print("\nOver-sampled data:")
print(f"Spots: {np.sum(y_train_over[:, 0])}")
print(f"Eyespots: {np.sum(y_train_over[:, 1])}")

print("\nUnder-sampled data:")
print(f"Spots: {np.sum(y_train_under[:, 0])}")
print(f"Eyespots: {np.sum(y_train_under[:, 1])}")

# Image analysis

## CNN

In [None]:
batch_size = 256
epochs = 15
num_classes = 2
input_shape = (30, 30, 3)

### Model Construction

#### Image Augmentation layer

In [None]:
data_augmentation = Sequential([
    layers.RandomFlip("horizontal_and_vertical", input_shape=input_shape),
    layers.RandomRotation(0.2)
])

#### Convolutional and Pooling Layers

In [None]:
model_over = Sequential()
model_under = Sequential()

# model_over.add(data_augmentation)
# model_under.add(data_augmentation)

model_over.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model_under.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model_over.add(MaxPooling2D(pool_size=(2, 2)))
model_under.add(MaxPooling2D(pool_size=(2, 2)))

model_over.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model_under.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model_over.add(MaxPooling2D(pool_size=(2, 2)))
model_under.add(MaxPooling2D(pool_size=(2, 2)))

#### Fully Connected Layers

In [None]:
model_over.add(Flatten())
model_under.add(Flatten())
model_over.add(Dense(64, activation='relu'))
model_under.add(Dense(64, activation='relu'))
model_over.add(Dropout(0.1))
model_under.add(Dropout(0.1))

model_over.add(Dense(64, activation='relu'))
model_under.add(Dense(64, activation='relu'))
model_over.add(Dropout(0.3))
model_under.add(Dropout(0.3))

model_over.add(Dense(32, activation='relu'))
model_under.add(Dense(32, activation='relu'))
model_over.add(Dropout(0.5))
model_under.add(Dropout(0.5))

model_over.add(Dense(num_classes, activation='softmax'))
model_under.add(Dense(num_classes, activation='softmax'))

model_over.summary()
model_under.summary()

#### Compile the Model

In [None]:
model_over.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
model_under.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])

#### Train the Model

In [None]:
history_over = model_over.fit(x_train_over, y_train_over, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_validation, y_validation))
history_under = model_under.fit(x_train_under, y_train_under, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_validation, y_validation))

## Visualize the Results

In [None]:
# F1 score

y_pred_over = model_over.predict(x_validation)
y_pred_over = np.argmax(y_pred_over, axis=1)
y_true_over = np.argmax(y_validation, axis=1)

y_pred_under = model_under.predict(x_validation)
y_pred_under = np.argmax(y_pred_under, axis=1)
y_true_under = np.argmax(y_validation, axis=1)

print("F1 score over-sampled data: ", f1_score(y_true_over, y_pred_over))
print("F1 score under-sampled data: ", f1_score(y_true_under, y_pred_under))

plt.figure(figsize=(10, 10))

plt.subplot(1, 2, 1)
plt.title('Over-sampled data')
plt.plot(history_over.history['accuracy'], label='Training accuracy')
plt.plot(history_over.history['loss'], label='Training loss')
plt.plot(history_over.history['val_accuracy'], label='Validation accuracy')
plt.plot(history_over.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.legend(loc='best')

plt.subplot(1, 2, 2)
plt.title('Under-sampled data')
plt.plot(history_under.history['accuracy'], label='Training accuracy')
plt.plot(history_under.history['loss'], label='Training loss')
plt.plot(history_under.history['val_accuracy'], label='Validation accuracy')
plt.plot(history_under.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.legend(loc='best')

validation_loss_over, validation_accuracy_over = model_over.evaluate(x_validation, y_validation, verbose=0)
validation_loss_under, validation_accuracy_under = model_under.evaluate(x_validation, y_validation, verbose=0)

print("Over-sampled data: Validation loss: {:.2f}, Validation accuracy: {:.2f}".format(validation_loss_over, validation_accuracy_over))
print("Under-sampled data: Validation loss: {:.2f}, Validation accuracy: {:.2f}".format(validation_loss_under, validation_accuracy_under))

## Evaluate the Model

In [None]:
score = model_over.evaluate(x_validation, y_validation, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
predictions = model_over.predict(x_validation)

showImages(x_validation, predictions.argmax(axis=1), 0)