Import neccessary packages

In [0]:
import numpy as np
import pickle
import cv2
import tensorflow as tf
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation, Flatten, Dropout, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import shutil
from shutil import copyfile
import os
from tensorflow.keras.models import load_model

**Mount Drive (if needed)**


In [0]:
from google.colab import drive
drive.mount('/content/drive',force_remount = True)

**Include repository link if needed**

In [0]:
!git clone <repositoryname>

**Use this cell for importing Kaggle Datasets (if needed)**
**BE SURE TO UPLOAD 'kaggle.json' BEFORE RUNNING THIS CELL**

In [0]:
!pip install -q kaggle

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/


!chmod 600 ~/.kaggle/kaggle.json

**Use below script ONLY if dataset is not split into train and test**

In [0]:
#USE BELOW CELL IFF YOU HAVE DATA NOT SPLIT INTO TRAIN AND TEST SETS.


import os
import random
import shutil
from shutil import copyfile

def mk_dir_comeback(name):
  os.chdir('train')
  os.makedirs(name)
  os.chdir('../')
  os.chdir('test')
  os.makedirs(name)
  os.chdir('../')


crop = 'Rice'#<----------------------------------Change name to Dataset folder, the script will handle the rest
dir_list=[]
for dir in os.listdir('./'):
  if crop in dir:
    dir_list.append(dir)

print(dir_list)

for dir in dir_list:
  mk_dir_comeback(dir)

  filenames = os.listdir(dir)
  filenames.sort()  
  random.seed(230)
  random.shuffle(filenames)

  split = int(0.9 * len(filenames))
  train_filenames = filenames[:split]
  test_filenames = filenames[split:]

  for file in train_filenames:
    copyfile(dir+'/'+file, 'train/'+dir+'/'+file)

  for file in test_filenames:
    copyfile(dir+'/'+file, 'test/'+dir+'/'+file)

In [0]:
#Just a check if the above operation worked or not
!ls train/
!ls test/

**Set up Image Generators for loading data**
Sometimes data is too big to simply load into the RAM, it loads data from storage.

---



In [0]:
TRAIN_DIR = ""
HEIGHT = 256 #<--- INPUT SIZE
WIDTH = 256 #<--- INPUT SIZE
DEPTH = 3
BATCH_SIZE = 32
TEST_DIR=""

train_datagen =  ImageDataGenerator(
      rescale = 1./255,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      rotation_range=30,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest'
    )
validation_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = train_datagen.flow_from_directory(TRAIN_DIR, 
                                                    target_size=(HEIGHT, WIDTH), 
                                                    batch_size=BATCH_SIZE)
validation_generator = validation_datagen.flow_from_directory(TEST_DIR, 
                                                    target_size=(HEIGHT, WIDTH), 
                                                    batch_size=BATCH_SIZE)


print(train_generator.class_indices)

**Run this cell once if the training has to be restarted in the current session**

In [0]:
train_generator.reset()
validation_generator.reset()

In [0]:
#set the num_classes manually or change path to any of your train and test folders.
INIT_LR = 0.001
NUM_EPOCHS = 10
classes=os.listdir(<path>)
num_classes = len(classes)
print(num_classes)

**Following is the architecture**

---

Please note that the architecture is comprised of block(s) of Conv2D,BatchNorm, MaxPooling and Dropout.

Feel free to change the architecture as per requirements.

In [0]:
inputShape = (HEIGHT, WIDTH, DEPTH)
chanDim = -1
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), padding="same",input_shape=inputShape,activation='relu'),
    tf.keras.layers.BatchNormalization(axis=chanDim),
    tf.keras.layers.MaxPooling2D(pool_size=(3, 3)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Conv2D(64, (3, 3), padding="same",activation='relu'),
    tf.keras.layers.BatchNormalization(axis=chanDim),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes,activation='softmax'),
])
"""
model = Sequential()
inputShape = (HEIGHT, WIDTH, DEPTH)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation("softmax"))
"""

Model Summary

In [0]:
model.summary()

In [0]:
opt = Adam(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
# distribution
model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=['acc'])
# train the network

In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint

#logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
checkpoint = ModelCheckpoint('best_model.h5', verbose=1, monitor='val_acc', save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

**start training**
By default, the model with best validation accuracy will be saved.

In [0]:

history = model.fit_generator(train_generator, epochs=NUM_EPOCHS, workers=8, 
                                       steps_per_epoch=train_generator.n//train_generator.batch_size,
                                       validation_data=validation_generator, validation_steps=7,
                                       shuffle=True, callbacks=callbacks_list)


**RUN THIS CELL ONLY TO CONTINUE TRAINING OF A SAVED MODEL (helpful when model isn't improving, so reduce the learning rate and continue training)**

In [0]:
#cell to continue training of saved model
new_model = load_model('best_model.h5')
tf.keras.backend.set_value(new_model.optimizer.lr, 0.0001)
history = new_model.fit_generator(train_generator, epochs=5, workers=8, 
                                       steps_per_epoch=train_generator.n//train_generator.batch_size,
                                       validation_data=validation_generator, validation_steps=7,
                                       shuffle=True, callbacks=callbacks_list)

Save model

In [0]:
%cd /content
model.save('model_name.h5')

Plot metrics (Can also use Tensorboard for this purpose)

In [0]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

Model Accuracy

In [0]:
print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

Use below code to test model in the notebook itself

---



In [0]:
from tensorflow.keras.models import load_model
import numpy as np
from matplotlib.pyplot import imread
model = load_model('tomato1.h5')
my_images = ['/content/PlantVillage-Dataset/raw/color/Tomato___Bacterial_spot/0045ba29-ed1b-43b4-afde-719cc7adefdb___GCREC_Bact.Sp 6254.JPG']
for image in my_images:
  fname = image
  im = np.array(imread(fname))
  img_array = np.resize(im,(256,256,3))
  img_array = img_array*(1/255.)
  img_array = np.expand_dims(img_array, axis=0)
  print(img_array.shape)
  prediction = model.predict(img_array)
  print(prediction)

If want to save the model to Google Drive.

In [0]:
!cp best_model.h5 /content/drive/My\ Drive/