In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [2]:
import os
import cv2

In [3]:
os.getcwd()

## Exploring Datatset

In [4]:
train_dir = "../input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"
validation_dir = "../input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid"

test_dir = "../input/new-plant-diseases-dataset/test/test"

In [5]:
# Total no. of object Classes/ plant diseases
len(os.listdir(train_dir))

In [6]:
# Name of object classes/ plant disease
os.listdir(train_dir)

In [7]:
# Unique plants in datset
# Total no. of disease

plantNames = []
num_disease = 0

for disease in os.listdir(train_dir):
    plant = disease.split('___')[0]
    diseaseClass = disease.split('___')[1]
    
    if plant not in plantNames:
        plantNames.append(plant)
        
    if diseaseClass != "healthy":
        num_disease += 1

In [8]:
# Unique Plants
print("No. of plant categories: ", len(plantNames))
print("Unique plants are: ", plantNames)

In [9]:
# Disease Count
print("Total no. of diseases are:", num_disease)

In [10]:
# No. of training images for each disease
# Total no. of training images

total_training_images = 0
train_images_per_disease = {'disease names': [], 'no of images':[]}

for diseaseName in os.listdir(train_dir):
    path = train_dir + '/' + diseaseName
    train_images_per_disease['disease names'].append(diseaseName)
    train_images_per_disease['no of images'].append(len(os.listdir(path)))
    
    total_training_images += len(os.listdir(path))

In [11]:
# Converting into dataFrame
train_img_per_disease_df = pd.DataFrame(train_images_per_disease)
train_img_per_disease_df

In [12]:
# Visulalising on graph
x = train_img_per_disease_df['disease names']
y = train_img_per_disease_df['no of images']

plt.figure(figsize=(20,7))
plt.bar(x, y, width = 0.6)
plt.xlabel('Plants/Diseases', fontsize=10)
plt.ylabel('No of training images available', fontsize=10)
plt.xticks(rotation=90)
plt.title('Training images per each class of plant disease')
plt.show()

In [13]:
# Total no. of images available
print("Total no. of images available for training:", total_training_images)

## Displaying Images

In [14]:
# plotting subplot class-wise

def plotImages(diseaseName):
    plt.figure(figsize=(12,3))
    plt.suptitle(diseaseName, fontsize = 18)
    
    path = train_dir + '/' + diseaseName
    imageNames = os.listdir(path)
    
    for i in range(5):
        img = cv2.imread(path + '/' + imageNames[i])
        plt.subplot(1,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(img)

In [15]:
# Displaying some sample images

for i in range(6):
    disease = train_images_per_disease['disease names'][i]
    plotImages(disease)

In [16]:
# Size of image

for i in range(4):
    images_path  = train_dir + '/' + train_images_per_disease['disease names'][i]
    sample_img = cv2.imread(images_path + '/' + os.listdir(images_path)[0])
    print(sample_img.shape)

So, we can infer that we have images of same dimension in each class i.e. (256,256,3)

## Loading Images

In [17]:
import keras
from keras.preprocessing import image

In [18]:
print("Unique classes in training dataset: ", len(os.listdir(train_dir))) 
print("Unique Classes in validation dataset: ",len(os.listdir(validation_dir)))

Hence, there are same no. of classes available in both training and validation dataset

In [19]:
train_datagen = image.ImageDataGenerator( rescale=1./255 )
valid_datagen = image.ImageDataGenerator( rescale=1./255 )

In [20]:
train_generator = train_datagen.flow_from_directory(train_dir, 
                                                    target_size=(224, 224), 
                                                    batch_size=64, 
                                                    class_mode='categorical',
                                                    color_mode='rgb',
                                                    shuffle=True,
                                                    seed=42)

In [21]:
validation_generator = valid_datagen.flow_from_directory(validation_dir, 
                                                        target_size=(224, 224), 
                                                        batch_size=64, 
                                                        class_mode='categorical',
                                                        color_mode='rgb',
                                                        shuffle=True,
                                                        seed=42)

## Building Model
Using **Pre-trained Models**

In [22]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten

### ResNet50

In [64]:
base_model = ResNet50()

In [None]:
base_model.summary()

In [65]:
x = base_model.output
x = Dense(512, activation='relu')(x)
output = Dense(38, activation='softmax')(x)  

resnet_model = Model(base_model.input, outputs=output)

In [None]:
resnet_model.summary()

In [59]:
resnet_model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'] )

In [None]:
resnet_model.fit(train_generator, epochs = 2, validation_data = validation_generator)

### MobileNet

In [23]:
# Loading Model
base_model1 = MobileNetV2(include_top = False, input_shape = (224,224,3), weights="imagenet", classes=38)

# Stop from being trainable
base_model1.trainable = False

In [24]:
# Define the layers
inputs = keras.Input(shape=(224,224,3))

# Get the layer
x = base_model1(inputs, training = False)

# Stack layers further
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(38, activation="softmax")(x)

# Combine the model
model = Model(inputs=inputs, outputs=x)

# Summary
model.summary()

In [25]:
# Compile
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Define callbacks to use
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)

In [26]:
# Num epochs
epochs = 15

# Train model
model.fit(train_generator, epochs=epochs, steps_per_epoch=200, callbacks=[early_stopping_cb], validation_data = validation_generator)

In [27]:
# Plotting Accuracy
plt.plot(model.history.history['accuracy'], label='train accuracy')
plt.plot(model.history.history['val_accuracy'], label='val accuracy')
plt.legend()
plt.show()

### Making Predictions
**MobileNetV2 model**

In [28]:
test_dir

In [29]:
os.listdir(test_dir)

In [30]:
# No. of test images
len(os.listdir(test_dir))

In [49]:
train_classes = sorted(os.listdir(train_dir))    # becasuse the classes in out train directory is in sorted order
predictions = {'actual class': [], 'predicted class': []}

for i in os.listdir(test_dir):
    img = cv2.imread(test_dir + '/' + i)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0
    
    img_class = i.split('.')[0]
    predictions['actual class'].append(img_class)
    
    # we need to pass input of size (1,224,224,3) to our model
    test_img = np.reshape(img, (1,224,224,3))
    pred = model.predict(test_img)
    class_maxProb = np.argmax(pred)
    
    predictions['predicted class'].append(train_classes[class_maxProb])

In [50]:
prediction_df = pd.DataFrame(predictions)
prediction_df

In [54]:
sample_test_img = cv2.imread(test_dir + '/' + os.listdir(test_dir)[1])

plt.imshow(sample_test_img)
plt.title(os.listdir(test_dir)[1].split('.')[0])
plt.show()