In [None]:
#import necessary libraries
import os

from PIL import Image
import matplotlib.pyplot as plt
import json

import numpy as np
import pandas as pd

from sklearn.utils import class_weight

import keras
import tensorflow as tf

from keras_preprocessing.image import ImageDataGenerator, load_img
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Activation
from keras import optimizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.utils.vis_utils import plot_model
from keras.regularizers import L1L2

from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications import MobileNetV2, InceptionV3
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input


In [None]:
DIR = "/kaggle/input/cassava-leaf-disease-classification"
TEST_DIR = "/kaggle/input/cassava-leaf-disease-classification/test_images/"
IMAGE_DIR = "/kaggle/input/cassava-leaf-disease-classification/train_images/"
EPOCHS = 30
BATCH_SIZE = 64
VALID_STEPS = 30
IMAGE_SIZE = 512, 512
IMG_SIZE = 224, 224

In [None]:
train_df = pd.read_csv(os.path.join(DIR, 'train.csv'))
train_df

In [None]:
#displays sample image
sample_image = Image.open(os.path.join(DIR, 'train_images', train_df['image_id'][0]))
img_array = np.array(sample_image)
plt.imshow(img_array)
print(sample_image.size)

In [None]:
#load labels from json
f = open(os.path.join(DIR, 'label_num_to_disease_map.json'),)

data = json.load(f)

for i in data:
    print(i)

label0 = ''
for i in data:
    for label in data[str(i)]:
        label0 = label0 + label

print(label0)

f.close()

In [None]:
#plots the labels 
print(train_df["label"])
label_list = train_df["label"].tolist()
plt.hist(label_list, 5)

In [None]:
#adds labels to dataframe
train_df['label'] = train_df['label'].astype(str)
print(train_df)

In [None]:
#loads images using generator
datagen = ImageDataGenerator(rescale=1./255., validation_split=0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip=True, vertical_flip = True, preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,)


train_generator = datagen.flow_from_dataframe(
        dataframe = train_df,
        directory = IMAGE_DIR,
        x_col = "image_id",
        y_col = "label",
        subset = "training",
        batch_size = BATCH_SIZE,
        seed = 42,
        shuffle = True,
        class_mode = "categorical",
        color_mode = 'rgb',
        target_size = (IMG_SIZE)
        
)

val_generator = datagen.flow_from_dataframe(
        dataframe = train_df,
        directory = IMAGE_DIR,
        x_col = "image_id",
        y_col = "label",
        subset = "validation",
        batch_size = BATCH_SIZE,
        seed = 42,
        shuffle = False,
        class_mode = "categorical",
        color_mode = 'rgb',
        target_size = (IMG_SIZE)
        )


In [None]:
#loads pre-trained weights
def load_EfficeientNet_model():
    
    EF_model = EfficientNetB3(include_top=False, input_shape=(224, 224, 3), weights='../input/keras-pretrained-models/EfficientNetB3_NoTop_ImageNet.h5', drop_connect_rate=0.4)
    
    return EF_model
    

In [None]:
#builds trainable portion of model
#takes base model from load_EfficeintNet_model()
#returns model, optimizer, loss, and metrics
def build_EfficientNetModel(base_model):
    
    new_model = Sequential()
    
    new_model.add(base_model)
    
    new_model.add(GlobalAveragePooling2D())
    new_model.add(Flatten())
    
    new_model.add(Dense(256, activation='relu', bias_regularizer=L1L2(l1=0.01, l2=0.001)))
    new_model.add(Dropout(0.5))
    
    new_model.add(Dense(5, activation='softmax'))
    
    optimizer = optimizers.Adam(lr = 1e-3)
    loss = keras.losses.CategoricalCrossentropy(label_smoothing=0.0001)
    metrics = ['accuracy']
    return new_model, optimizer, loss, metrics

In [None]:
EfficientNet_model = load_EfficeientNet_model()

model, optimizer, loss, metrics = build_EfficientNetModel(EfficientNet_model)

In [None]:
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
model.summary()

In [None]:
#tuned model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3)
checkpoint = ModelCheckpoint("best_model.h5", monitor='val_loss', save_best_only=True)
reducelr = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, mode='min', factor=0.4, min_lr=1e-6,)
history = model.fit_generator(generator=train_generator,
                              epochs=EPOCHS,
                              validation_data=val_generator,
                              callbacks = [early_stopping, reducelr])


In [None]:
#plots training/validation loss and accuracy
history_dataframe = pd.DataFrame(history.history)
history_dataframe.loc[:, ['loss', 'val_loss']].plot()
history_dataframe.loc[:, ['accuracy', 'val_accuracy']].plot();

In [None]:

model.save('modelv2.h5')

In [None]:
#model = load_model('../input/saved-cassavaa-ef/model_EF_86.h5')

In [None]:
#takes image: resizes, standardizes, and converts to array
def process_image(image):
    tf.convert_to_tensor(image)
    image = tf.image.resize(image, (224, 224))
    image /= 225
    image.numpy()
    return image
    

In [None]:

#creates final submission.csv
submission_pd = pd.DataFrame(columns=['image_id', 'label'])

#each test image is processed using process_image and given to the model
#the model's predictions are recorded 
for image_id in os.listdir(TEST_DIR):
    img_path = os.path.join(TEST_DIR + image_id)
    im = Image.open(img_path)
    img = np.asarray(im)
    
    processed_img = process_image(img)
    
    np_image = np.expand_dims(processed_img, axis=0)

    pred = model.predict_classes(np_image)
    
    submission_pd = submission_pd.append(pd.DataFrame({'image_id': image_id,
                                                    'label': pred}))
    
submission_pd

In [None]:
submission_pd.to_csv('submission.csv', index = False)