In [None]:
import tensorflow as tf
tf.__version__

In [None]:
import pandas as pd
import numpy as np
import os

import cv2

import albumentations as albu
from albumentations import Compose, ShiftScaleRotate, Resize
from albumentations.pytorch import ToTensor


from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from sklearn.metrics import confusion_matrix
import itertools
from sklearn.metrics import classification_report

import shutil

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
IMAGE_CHANNELS = 3

In [None]:
os.listdir("../input/rice-diseases-image-dataset/LabelledRice/Labelled")

In [None]:
brownspot_list = \
os.listdir('../input/rice-diseases-image-dataset/LabelledRice/Labelled/BrownSpot')
healthy_list = \
os.listdir('../input/rice-diseases-image-dataset/LabelledRice/Labelled/Healthy')
hispa_list = \
os.listdir('../input/rice-diseases-image-dataset/LabelledRice/Labelled/Hispa')
LeafBlast_list = \
os.listdir('../input/rice-diseases-image-dataset/LabelledRice/Labelled/LeafBlast')

print(len(brownspot_list))
print(len(healthy_list))
print(len(hispa_list))
print(len(LeafBlast_list))

In [None]:
df_brownspot = pd.DataFrame(brownspot_list, columns=['image'])
df_brownspot['target'] = 'brownspot'

df_healthy = pd.DataFrame(healthy_list, columns=['image'])
df_healthy['target'] = 'healthy'

df_hispa = pd.DataFrame(hispa_list, columns=['image'])
df_hispa['target'] = 'hispa'
df_LeafBlast = pd.DataFrame(LeafBlast_list, columns=['image'])
df_LeafBlast['target'] = 'LeafBlast'


# Create a val set for each class

# Sample 5 validation images from each class
df_brownspot_val = df_brownspot.sample(n=105, random_state=500)
df_healthy_val = df_healthy.sample(n=297, random_state=500)
df_hispa_val = df_hispa.sample(n=113, random_state=500)
df_LeafBlast_val = df_LeafBlast.sample(n=155, random_state=500)


print(len(df_brownspot_val))
print(len(df_healthy_val))
print(len(df_hispa_val))
print(len(df_LeafBlast_val))

In [None]:
# get a list of val images
val_list = list(df_brownspot_val['image'])
# filter out the val images
df_brownspot_train = df_brownspot[~df_brownspot['image'].isin(val_list)] # ~ means notin

# brown_spot
# get a list of val images
val_list = list(df_healthy_val['image'])
# filter out the val images
df_healthy_train = df_healthy[~df_healthy['image'].isin(val_list)] # ~ means notin

# bacterial_leaf_blight
# get a list of val images
val_list = list(df_hispa_val['image'])
# filter out the val images
df_hispa_train = \
df_hispa[~df_hispa['image'].isin(val_list)] # ~ means notin
#////////////////////////////////
val_list = list(df_LeafBlast_val['image'])
# filter out the val images
df_LeafBlast_train = \
df_LeafBlast[~df_LeafBlast['image'].isin(val_list)] # ~ means notin

print(len(df_brownspot_train ))
print(len(df_healthy_train))
print(len(df_hispa_train))
print(len(df_LeafBlast_train))



In [None]:

df_data = pd.concat([df_brownspot, df_healthy, df_hispa,df_LeafBlast], axis=0).reset_index(drop=True)

df_train = \
pd.concat([df_brownspot_train, df_healthy_train, df_hispa_train,df_LeafBlast_train], axis=0).reset_index(drop=True)

df_val = \
pd.concat([df_brownspot_val, df_healthy_val, df_hispa_val,df_LeafBlast_val], axis=0).reset_index(drop=True)

df_data = shuffle(df_data)
df_train = shuffle(df_train)
df_val = shuffle(df_val)

print(df_data.shape)
print(df_train.shape)
print(df_val.shape)

In [None]:
df_data['target'].value_counts()

In [None]:
df_train['target'].value_counts()

In [None]:
df_val['target'].value_counts()

In [None]:
val_len = len(df_val)
train_len = len(df_train)
df_combined =  pd.concat(objs=[df_val, df_train], axis=0).reset_index(drop=True)

# create the dummy variables
df_combined = pd.get_dummies(df_combined, columns=['target'])

# separate the train and val sets
df_val = df_combined[:val_len]
df_train = df_combined[val_len:]


print(df_train.shape)
print(df_val.shape)

In [None]:
val_len = len(df_val)
train_len = len(df_train)
df_combined =  pd.concat(objs=[df_val, df_train], axis=0).reset_index(drop=True)
​
# create the dummy variables
df_combined = pd.get_dummies(df_combined, columns=['target'])
​
# separate the train and val sets
df_val = df_combined[:val_len]
df_train = df_combined[val_len:]
​
​
print(df_train.shape)
print(df_val.shape)df_combined.head()

In [None]:
df_train.head()

In [None]:
df_val.head()

In [None]:
df_combined.to_csv('df_combined.csv.gz', compression='gzip', index=False)

df_train.to_csv('df_train.csv.gz', compression='gzip', index=False)
df_val.to_csv('df_val.csv.gz', compression='gzip', index=False)

In [None]:
!ls

In [None]:
# Create a new directory
image_dir = 'image_dir'
os.mkdir(image_dir)

!ls

In [None]:
len(os.listdir('image_dir'))

In [None]:
import albumentations as albu


def augment_image(augmentation, image):
    
    """
    Uses the Albumentations library.
    
    Inputs: 
    1. augmentation - this is the instance of type of augmentation to do 
    e.g. aug_type = HorizontalFlip(p=1) 
    # p=1 is the probability of the transform being executed.
    
    2. image - image with shape (h,w)
    
    Output:
    Augmented image as a numpy array.
    
    """
    # get the transform as a dict
    aug_image_dict =  augmentation(image=image)
    # retrieve the augmented matrix of the image
    image_matrix = aug_image_dict['image']
    
    
    return image_matrix

In [None]:
# Define the transforms


aug_types = albu.Compose([
            albu.HorizontalFlip(),
             albu.OneOf([
                albu.HorizontalFlip(),
                albu.VerticalFlip(),
                ], p=0.8),
            albu.OneOf([
                albu.RandomContrast(),
                albu.RandomGamma(),
                albu.RandomBrightness(),
                ], p=0.3),
            albu.OneOf([
                albu.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                albu.GridDistortion(),
                albu.OpticalDistortion(distort_limit=2, shift_limit=0.5),
                ], p=0.3),
            albu.ShiftScaleRotate()
            ])

In [None]:
# Get an image to test transformations

# get a list of train png images
path = 'image_dir/'
image_list = os.listdir('image_dir')

fname = image_list[1]
image_path = path + fname

image = plt.imread(image_path)
plt.imshow(image)

In [None]:
# Test the transformation setup.
# The image will be different each time this cell is run.

aug_image = augment_image(aug_types, image)

plt.imshow(aug_image)

In [None]:
df_train.head()

In [None]:
def train_generator(batch_size=8):
    
    while True:
        
        # load the data in chunks (batches)
        for df in pd.read_csv('df_train.csv.gz', chunksize=batch_size):
            
            # get the list of images
            image_id_list = list(df['image'])
            
            # Create empty X matrix - 3 channels
            X_train = np.zeros((len(df), IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS), dtype=np.uint8)
            
            
            
        
            
            # Create X_train
            #================
            
            for i in range(0, len(image_id_list)):
              
              
                # get the image and mask
                image_id = image_id_list[i]


                # set the path to the image
                path = 'image_dir/' + image_id

                # read the image
                image = cv2.imread(path)
                
                # convert to from BGR to RGB
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                # resize the image
                image = cv2.resize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
                
                
                 # Create y_train
            # ===============
                cols = ['target_brownspot', 'target_healthy', 'target_hispa','target_LeafBlast']
                y_train = df[cols]
                y_train = np.asarray(y_train) 

                # change the shape to (batch_size, 1)
                #y_train = y_train.reshape((-1, 1)) # -1 tells numpy to automatically detect the batch size
       
              
            # Augment the image and mask
            # ===========================

                aug_image = augment_image(aug_types, image)
              
                # insert the image into X_train
                X_train[i] = aug_image
                
                          
                
            # Normalize the images
            X_train = X_train/255

            yield X_train, y_train

In [None]:
# Test the generator

# initialize
train_gen = train_generator(batch_size=8)

# run the generator
X_train, y_train = next(train_gen)

print(X_train.shape)
print(y_train.shape)

In [None]:
y_train

In [None]:
# Print the first image in X_train
# Remember that train images have been augmented.

image = X_train[0,:,:,:]
plt.imshow(image)

In [None]:
def val_generator(batch_size=5):
    
    while True:
        
        # load the data in chunks (batches)
        for df in pd.read_csv('df_val.csv.gz', chunksize=batch_size):
            
            # get the list of images
            image_id_list = list(df['image'])
            
            # Create empty X matrix - 3 channels
            X_val = np.zeros((len(df), IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS), dtype=np.uint8)
            

        
            
            # Create X_val
            #================
            
            for i in range(0, len(image_id_list)):
              
              
                # get the image and mask
                image_id = image_id_list[i]
                

                # set the path to the image
                path = 'image_dir/' + image_id

                # read the image
                image = cv2.imread(path)
                
                # convert to from BGR to RGB
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                # resize the image
                image = cv2.resize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))

                # insert the image into X_train
                X_val[i] = image
                
                
            
            
            # Create y_val
            # ===============
            
                cols = ['target_brownspot', 'target_healthy', 'target_hispa','target_LeafBlast']
                y_val = df[cols]
                y_val = np.asarray(y_val) 

                # change the shape to (batch_size, 1)
                #y_val = y_val.reshape((-1, 1)) # -1 tells numpy to automatically detect the batch size
       
            
                
                          
                
            # Normalize the images
            X_val = X_val/255

            yield X_val, y_val

In [None]:
# Test the generator

# initialize
val_gen = val_generator(batch_size=5)

# run the generator
X_val, y_val = next(val_gen)

print(X_val.shape)
print(y_val.shape)

In [None]:
y_val

In [None]:
# print the image from X_val
image = X_val[0,:,:,:]
plt.imshow(image)

In [None]:
def test_generator(batch_size=1):
    
    while True:
        
        # load the data in chunks (batches)
        for df in pd.read_csv('df_val.csv.gz', chunksize=batch_size):
            
            # get the list of images
            image_id_list = list(df['image'])
            
            # Create empty X matrix - 3 channels
            X_test = np.zeros((len(df), IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS), dtype=np.uint8)
            

        
            
            # Create X_test
            #================
            
            for i in range(0, len(image_id_list)):
              
              
                # get the image and mask
                image_id = image_id_list[i]
                

                # set the path to the image
                path = 'image_dir/' + image_id

                # read the image
                image = cv2.imread(path)
                
                # convert to from BGR to RGB
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                # resize the image
                image = cv2.resize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))

                # insert the image into X_train
                X_test[i] = image
                
                 
                
            # Normalize the images
            X_test = X_test/255

            yield X_test

In [None]:
# Test the generator

# initialize
test_gen = test_generator(batch_size=1)

# run the generator
X_test = next(test_gen)

print(X_test.shape)

In [None]:
# print the image from X_test

image = X_test[0,:,:,:]
plt.imshow(image)

In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.metrics import categorical_accuracy

from tensorflow.keras.callbacks import (EarlyStopping, ReduceLROnPlateau, 
                                        ModelCheckpoint, CSVLogger, LearningRateScheduler)

In [None]:
from tensorflow.keras.applications.mobilenet import MobileNet

model = MobileNet(weights='imagenet')

# Exclude the last 2 layers of the above model.
x = model.layers[-2].output

# Create a new dense layer for predictions
# 3 corresponds to the number of classes
predictions = Dense(4, activation='softmax')(x)

# inputs=model.input selects the input layer, outputs=predictions refers to the
# dense layer we created above.

model = Model(inputs=model.input, outputs=predictions)

model.summary()


In [None]:
TRAIN_BATCH_SIZE = 8
VAL_BATCH_SIZE = 5

num_train_samples = len(df_train)
num_val_samples = len(df_val)
train_batch_size = TRAIN_BATCH_SIZE
val_batch_size = VAL_BATCH_SIZE

# determine numtrain steps
train_steps = np.ceil(num_train_samples / train_batch_size)
# determine num val steps
val_steps = np.ceil(num_val_samples / val_batch_size)

In [None]:
# Initialize the generators
train_gen = train_generator(batch_size=TRAIN_BATCH_SIZE)
val_gen = val_generator(batch_size=VAL_BATCH_SIZE)

model.compile(
    Adam(lr=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)



filepath = "model.h5"

#earlystopper = EarlyStopping(patience=10, verbose=1)

checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
                             save_best_only=True, mode='max')

#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, 
                                   #verbose=1, mode='min')



log_fname = 'training_log.csv'
csv_logger = CSVLogger(filename=log_fname,
                       separator=',',
                       append=False)

callbacks_list = [checkpoint, csv_logger]

history = model.fit_generator(train_gen, steps_per_epoch=train_steps, epochs=50, 
                              validation_data=val_gen, validation_steps=val_steps,
                             verbose=1,callbacks=callbacks_list)

In [None]:
# Display the training log

train_log = pd.read_csv('training_log.csv')

train_log.head()

In [None]:
# get the metric names so we can use evaulate_generator
model.metrics_names

In [None]:
model.load_weights('model.h5')

val_gen = val_generator(batch_size=1)

val_loss, val_acc = \
model.evaluate_generator(val_gen, 
                        steps=len(df_val))

print('val_loss:', val_loss)
print('val_acc:', val_acc)

In [None]:
test_gen = test_generator(batch_size=1)

preds = model.predict_generator(test_gen, steps=len(df_val), verbose=1)

In [None]:
# get y_pred as index values

y_pred = np.argmax(preds, axis=1)

y_pred

In [None]:
# get y_true as index values

cols = ['target_brownspot', 'target_healthy', 'target_hispa','target_LeafBlast']
y_true = df_val[cols]
y_true = np.asarray(y_true) 

y_true = np.argmax(y_true, axis=1)

y_true

In [None]:
# Compare y_true and y_pred

print(y_pred)
print(y_true)


In [None]:
import matplotlib.pyplot as plt
labels ='brown spot','healthy','hispa','leafblast'
size=[len(df_brownspot_train ),len(df_healthy_train ),len(df_hispa_train),len(df_LeafBlast_train)]
explode=(0,0.1,0,0)
fig1,ax1=plt.subplots()
ax1.pie(size,explode=explode,labels=labels,autopct='%1.1f%%',shadow=True,startangle=90)
ax1.axis('equal')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

cm = confusion_matrix(y_true, y_pred)

In [None]:
# bacterial_leaf_blight = 0
# brown_spot = 1
# leaf_smut = 2

cm_plot_labels = ['brownspot', 'healthy', 'hispa','LeafBlast']

plot_confusion_matrix(cm, cm_plot_labels, title='Confusion Matrix')

In [None]:
from sklearn.metrics import classification_report

# Generate a classification report
report = classification_report(y_true, y_pred, target_names=['brownspot', 'healthy', 'hispa','LeafBlast'])

print(report)

In [None]:
# --ignore-installed is added to fix an error.

# https://stackoverflow.com/questions/49932759/pip-10-and-apt-how-to-avoid-cannot-uninstall
# -x-errors-for-distutils-packages

!pip install tensorflowjs --ignore-installed

In [None]:
# Use the command line conversion tool to convert the model

!tensorflowjs_converter --input_format keras model.h5 tfjs/model

In [None]:
!ls

In [None]:
# Delete the test_dir directory we created to prevent a Kaggle error.
# Kaggle allows a max of 500 files to be saved.

shutil.rmtree('image_dir')

In [None]:
!ls

In [None]:
import matplotlib.pyplot as plt
labels ='brown spot','healthy','hispa','leafblast'
size=[len(df_brownspot_val ),len(df_healthy_val ),len(df_hispa_val),len(df_LeafBlast_val)]
explode=(0,0.1,0,0)
fig1,ax1=plt.subplots()
ax1.pie(size,explode=explode,labels=labels,autopct='%1.1f%%',shadow=True,startangle=90)
ax1.axis('equal')
plt.show()

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show() 
