In [1]:
import gdown
#!gdown --id "1NdsNtg7RpTESOKojFbG5vlJMyxbbnNfx" # images from gdrive
!wget files.brainfriz.com/train_images.zip      # secondary link for images
!wget files.brainfriz.com/augmented_images.zip  # augmented images
!wget files.brainfriz.com/aug_train.csv         # augmented train.csv
!wget files.brainfriz.com/full_train.csv        # full train -> augmented + original
!gdown --id "1xbEVK_NigW_5ngwKMHvuOTehYhT2v2WF" # labels
!gdown --id "1SvI9dN2_25c2OlevwK4TjmzBNysjE_PO" # label mapping

--2021-03-18 11:29:18--  http://files.brainfriz.com/train_images.zip
Resolving files.brainfriz.com (files.brainfriz.com)... 138.201.201.196
Connecting to files.brainfriz.com (files.brainfriz.com)|138.201.201.196|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://files.brainfriz.com/train_images.zip [following]
--2021-03-18 11:29:19--  https://files.brainfriz.com/train_images.zip
Connecting to files.brainfriz.com (files.brainfriz.com)|138.201.201.196|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2569658627 (2.4G) [application/zip]
Saving to: ‘train_images.zip’


2021-03-18 11:31:22 (20.0 MB/s) - ‘train_images.zip’ saved [2569658627/2569658627]

--2021-03-18 11:31:22--  http://files.brainfriz.com/augmented_images.zip
Resolving files.brainfriz.com (files.brainfriz.com)... 138.201.201.196
Connecting to files.brainfriz.com (files.brainfriz.com)|138.201.201.196|:80... connected.
HTTP request sent, awaiting response...

In [2]:
import os
os.chdir('/content/')
!unzip -qq -o train_images.zip
!unzip -qq -o augmented_images.zip

In [3]:
import keras
import io
import json
import pandas as pd
import tensorflow as tf
import sklearn
import matplotlib.pyplot as plt
import itertools
import numpy as np
import os, shutil
from datetime import datetime
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from collections import Counter

In [4]:
!mkdir images #for the confusion matrix images

# move the training images (aug+original) to the same folder 
file_names = os.listdir('orig_and_aug_train_images/')
for file_name in file_names:
    shutil.move(os.path.join('orig_and_aug_train_images', file_name), 'train_images')

In [8]:
data = pd.read_csv('full_train.csv')
f = open('label_num_to_disease_map.json')
real_labels = json.load(f)
real_labels = {int(k):v for k,v in real_labels.items()}
data['class_name'] = data.label.map(real_labels)

train_path = 'train_images/'
train,val = train_test_split(data, test_size = 0.20, random_state = 20, shuffle = True, stratify = data['class_name'])

print("Training:", Counter(train['class_name']))
print("Validation:", Counter(val['class_name']))

IMG_SIZE = 224 #smaller size -> faster training. We have to see if this plays a role though for accuracy
SIZE = (IMG_SIZE,IMG_SIZE)
N_CLASSES = 5
BATCH_SIZE = 128 # Mobilenet 128, resnet: 64(?) Watch out for OOM
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()


train_set = train_datagen.flow_from_dataframe(train,
                                directory = train_path,
                                x_col = 'image_id',
                                y_col = 'class_name',
                                color_mode='rgb',
                                class_mode='categorical',
                                target_size = SIZE,
                                shuffle = True,
                                batch_size = BATCH_SIZE)

val_set = test_datagen.flow_from_dataframe(val,
                                directory = train_path,
                                x_col = 'image_id',
                                y_col = 'class_name',
                                color_mode='rgb',
                                class_mode='categorical',
                                target_size = SIZE,
                                shuffle = False, #important
                                batch_size = BATCH_SIZE)


Training: Counter({'Cassava Brown Streak Disease (CBSD)': 10527, 'Healthy': 10526, 'Cassava Bacterial Blight (CBB)': 10526, 'Cassava Green Mottle (CGM)': 10526, 'Cassava Mosaic Disease (CMD)': 10526})
Validation: Counter({'Cassava Mosaic Disease (CMD)': 2632, 'Cassava Green Mottle (CGM)': 2632, 'Healthy': 2632, 'Cassava Bacterial Blight (CBB)': 2631, 'Cassava Brown Streak Disease (CBSD)': 2631})
Found 52631 validated image filenames belonging to 5 classes.
Found 13158 validated image filenames belonging to 5 classes.


In [9]:
class ConfusionMatrix(keras.callbacks.Callback):
  def __init__(self, val_set, val_y):
    self.val_set = val_set
    self.val_y = val_y
    self.counter = 0

  def on_epoch_end(self, epoch, logs=None):
    self.plot()
    self.counter += 1

  def plot(self):
    test_pred_raw = self.model.predict(self.val_set)
    test_pred = np.argmax(test_pred_raw, axis=1)

    cm = sklearn.metrics.confusion_matrix(self.val_y, test_pred)
    self.plot_confusion_matrix(cm, class_names=[0,1,2,3,4])

  def plot_confusion_matrix(self, cm, class_names):
    figure = plt.figure(figsize=(8, 8))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Confusion matrix")
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    labels = np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=2)
    threshold = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      color = "white" if cm[i, j] > threshold else "black"
      plt.text(j, i, labels[i, j], horizontalalignment="center", color=color)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig('images/conf{0}.png'.format(self.counter)) #save file
    plt.close()

plotter = ConfusionMatrix(val_set, val_set.classes);

In [10]:
mobileNetV3Small = tf.keras.applications.MobileNetV3Small(
    input_shape=INPUT_SHAPE, alpha=1.0, minimalistic=True, include_top=True,
    weights=None, input_tensor=None, classes=N_CLASSES, pooling='avg',
    dropout_rate=0.2, classifier_activation='softmax'
)

inputs = tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
inputs = tf.keras.applications.mobilenet_v3.preprocess_input(inputs) #only for mobilenet

model = tf.keras.Model(inputs, mobileNetV3Small(inputs))

model.compile(loss='categorical_crossentropy',
                optimizer='adam',
                metrics=['accuracy','categorical_accuracy'])

model.fit(
        train_set,
        steps_per_epoch=train_set.n // BATCH_SIZE,
        epochs=40, 
        callbacks = [plotter]) #callback for confusion matrix per epoch


Epoch 1/40
  1/411 [..............................] - ETA: 1:32:18 - loss: 1.6527 - accuracy: 0.1953 - categorical_accuracy: 0.1953

KeyboardInterrupt: ignored

In [None]:
from google.colab import files
model.save("mobilenet")

!zip -r mobilenet.zip mobilenet
!zip -r conf.zip images

files.download('mobilenet.zip') 
files.download('images.zip') 