# Paddy Doctor - Paddy Disease Classification

> "A Multi Class Classification problem, for classifying the diseases in the Paddy Plant"

- toc: true
- branch: master
- badges: true
- comments: true
- categories: [paddy, plant, disease, classification, multi, class, kaggle, convolutional, neural, network]
- hide: false

In [1]:
# Required modules

import os
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from zipfile import ZipFile
from matplotlib import pyplot as plt

In [2]:
# Config

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 12)

Before running the below cell, upload your kaggle token, to make sure an error doesn't popup.

In [8]:
# Create kaggle folder

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [9]:
# Test the command

!kaggle competitions download -c paddy-disease-classification

Downloading paddy-disease-classification.zip to /content
 99% 1.00G/1.02G [00:07<00:00, 161MB/s]
100% 1.02G/1.02G [00:07<00:00, 147MB/s]


In [10]:
# Extract the zip file

with ZipFile('/content/paddy-disease-classification.zip', 'r') as zf:
    zf.extractall('./')

## Loading the data

In [11]:
# Load Constants

img_dim = 256
batch_size = 32
training_dir = './train_images'
testing_dir = './test_images'

In [12]:
# Load the data

train = pd.read_csv('train.csv')
train.head()

Unnamed: 0,image_id,label,variety,age
0,100330.jpg,bacterial_leaf_blight,ADT45,45
1,100365.jpg,bacterial_leaf_blight,ADT45,45
2,100382.jpg,bacterial_leaf_blight,ADT45,45
3,100632.jpg,bacterial_leaf_blight,ADT45,45
4,101918.jpg,bacterial_leaf_blight,ADT45,45


In [13]:
# Load the image data

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255.,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2
)

train_generator = datagen.flow_from_directory(
    training_dir,
    subset='training',
    seed=88,
    target_size=(img_dim, img_dim),
    batch_size=batch_size,
    class_mode='categorical'
)

valid_generator = datagen.flow_from_directory(
    training_dir,
    subset='validation',
    seed=88,
    target_size=(img_dim, img_dim),
    batch_size=batch_size,
    class_mode='categorical'
)

Found 8330 images belonging to 10 classes.
Found 2077 images belonging to 10 classes.


In [14]:
# Class Indices

train_generator.class_indices

{'bacterial_leaf_blight': 0,
 'bacterial_leaf_streak': 1,
 'bacterial_panicle_blight': 2,
 'blast': 3,
 'brown_spot': 4,
 'dead_heart': 5,
 'downy_mildew': 6,
 'hispa': 7,
 'normal': 8,
 'tungro': 9}

## Modelling

### Approach-1



Use of a Hand picked model, in modelling the problem

In [15]:
# Defining the model

model = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(256, 256, 3)),
        tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax'),
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 64)      1792      
                                                                 
 batch_normalization (BatchN  (None, 254, 254, 64)     256       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 32)      18464     
                                                                 
 batch_normalization_1 (Batc  (None, 125, 125, 32)     128       
 hNormalization)                                                 
                                                        

### Approach-2

In [None]:
# Loading the pre-trained model

input_layer = tf.keras.layers.Input(shape=(img_dim, img_dim, 3))
pretrained = tf.keras.applications.mobilenet_v2.MobileNetV2(include_top=False, weights='imagenet', input_tensor=input_layer, classes=10)

for layer in pretrained.layers:
    layer.trainable = False

custom_network = tf.keras.layers.Flatten()(pretrained.output)
custom_network = tf.keras.layers.Dense(32, activation='relu')(custom_network)
custom_network = tf.keras.layers.Dense(16, activation='relu')(custom_network)
custom_network = tf.keras.layers.Dense(10, activation='relu')(custom_network)

model = tf.keras.models.Model(inputs=input_layer, outputs=custom_network)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 128, 128, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 128, 128, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [16]:
# Callbacks

model_checkpoint = tf.keras.callbacks.ModelCheckpoint('custom_model_checkpoint.hdf5', save_best_only=True)

In [17]:
# Compile the model

optim = tf.keras.optimizers.Adam(lr=0.001)
loss = tf.keras.losses.categorical_crossentropy
metrics = ['acc']

model.compile(optimizer=optim, loss=loss, metrics=metrics)

  super(Adam, self).__init__(name, **kwargs)


In [18]:
# Fitting the model

epochs = 15
batch_size = 64

model.fit(train_generator, validation_data=valid_generator, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[model_checkpoint])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f32425c28d0>

In [20]:
# Loading the best model

model = tf.keras.models.load_model('custom_model_checkpoint.hdf5')

In [21]:
# Loading the test data

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(    
    directory=testing_dir,
    target_size=(256, 256),
    classes=['.'],
    shuffle=False,
    seed=88
)

Found 3469 images belonging to 1 classes.


In [22]:
# Getting test predictions

test_preds = model.predict(test_generator, verbose=1)
test_preds = np.argmax(test_preds, axis=1)



In [23]:
# Mapping the labels to disease names

inverse_map = {v: k for k, v in train_generator.class_indices.items()}
test_preds = [inverse_map[idx] for idx in test_preds]

In [24]:
# Saving the output

filenames = test_generator.filenames

output = pd.DataFrame({"image_id": filenames, "label": test_preds})
output.image_id = output.image_id.str.replace('./', '')
output.to_csv("submission.csv",index=False)

  


In [25]:
# Submission

!kaggle competitions submit -c paddy-disease-classification -f submission.csv -m "Custom Model with best saved V2 Network, epochs=25"

  0% 0.00/67.7k [00:00<?, ?B/s]100% 67.7k/67.7k [00:00<00:00, 371kB/s]
Successfully submitted to Paddy Doctor: Paddy Disease Classification

Accuracy: 0.71