In [1]:
#importing required libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt

#Training data
img_path = "./train_images"
train_dataframe = pd.read_csv("./train.csv")

#getting different output classes
no_labels = train_dataframe.labels.value_counts()
output_classes = list(no_labels.index)
classes = list(no_labels.values)
print("Number of ouput classes: ",len(output_classes))
print(output_classes)
print(classes)

Number of ouput classes:  12
['scab', 'healthy', 'frog_eye_leaf_spot', 'rust', 'complex', 'powdery_mildew', 'scab frog_eye_leaf_spot', 'scab frog_eye_leaf_spot complex', 'frog_eye_leaf_spot complex', 'rust frog_eye_leaf_spot', 'rust complex', 'powdery_mildew complex']
[4826, 4624, 3181, 1860, 1602, 1184, 686, 200, 165, 120, 97, 87]


In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
    rescale = 1/255.,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split = 0.2,
    zoom_range = 0.2,
    shear_range = 0.2,
    vertical_flip = False
)
size = 124;
#selecting 80% of the data as training data
train_dataset  = train_datagen.flow_from_dataframe(
    train_dataframe,
    directory = img_path,
    x_col = "image",
    y_col = "labels",
    target_size = (size,size),
    class_mode='categorical',
    batch_size = 32,
    validation_split = 0.2,
    subset = "training",
    shuffle = True,
    seed = 143,
    validate_filenames = False
)
#Remaining 20% as validation data
validation_dataset  = train_datagen.flow_from_dataframe(
    train_dataframe,
    directory = img_path,
    x_col = "image",
    y_col = "labels",
    target_size = (size,size),
    class_mode='categorical',
    batch_size = 32,
    validation_split = 0.2,
    subset = "validation",
    shuffle = True,
    seed = 143,
    validate_filenames = False
)

Found 14906 non-validated image filenames belonging to 12 classes.
Found 3726 non-validated image filenames belonging to 12 classes.


In [3]:
#CNN Model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same',input_shape=(size,size,3)))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(12,activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [4]:

chkpt=tf.keras.callbacks.ModelCheckpoint('./models',
                          monitor='val_loss',
                          mode='min',
                          save_best_only=True,
                          verbose=1)
stop=tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                       min_delta=0,
                       patience=10,
                       verbose=1,
                       restore_best_weights=True)

callbacks=[chkpt,stop]

model_history=model.fit(train_dataset,validation_data=validation_dataset,
                                 epochs=10,
                                 steps_per_epoch=train_dataset.samples//128,
                                 validation_steps=validation_dataset.samples//128,
                                 callbacks=callbacks)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.82173, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 2/10

Epoch 00002: val_loss improved from 1.82173 to 1.78592, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 3/10

Epoch 00003: val_loss improved from 1.78592 to 1.70791, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 4/10

Epoch 00004: val_loss improved from 1.70791 to 1.62622, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 5/10

Epoch 00005: val_loss did not improve from 1.62622
Epoch 6/10

Epoch 00006: val_loss did not improve from 1.62622
Epoch 7/10

Epoch 00007: val_loss improved from 1.62622 to 1.54524, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 8/10

Epoch 00008: val_loss improved from 1.54524 to 1.43818, saving model to .\models
INFO:tensorflow:Assets written to: .\models\assets
Epoch 9/10

Ep

In [6]:
test_dataframe = pd.read_csv("./sample_submission.csv")
test_datagen = ImageDataGenerator(rescale = 1./255)
sz = (124,124,3)
test_data =  test_datagen.flow_from_dataframe(
    test_dataframe,
    directory="./test_images",
    x_col='image',
    y_col=None,
    class_mode=None,
    target_size=sz[:2]
)
predictions = model.predict(test_data)

Found 3 validated image filenames.


In [7]:
def get_index(x):
    for a,y in train_dataset.class_indices.items():
        if x == y:
            return a

In [8]:
def predict_labels(predictions):
    pred_labels = []
    for p in predictions:
        pred_label = []
        z = (p>=0.4)
        
        if z.sum() == 0:
            pred_label.append('healthy')
        elif p[2] >= 0.5:
            pred_label.append('healthy')
            
        else:
            for i,j in enumerate(p):
                if j >= 0.28:
                    pred_label.append(get_index(i))
        
        pred_labels.append(' '.join(pred_label))
    return pred_labels

In [9]:
test_dataframe['labels'] = predict_labels(predictions)
test_dataframe.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,healthy
1,ad8770db05586b59.jpg,frog_eye_leaf_spot
2,c7b03e718489f3ca.jpg,rust


In [10]:
test_dataframe.to_csv('submission.csv')