In [21]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import glob # identify the specific file based on name
import cv2 # computer vision
from pathlib import Path # to read the path
from skimage.io import imread, imsave
from skimage.transform import resize


import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras import layers as L
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import RMSprop,Adam
from keras.applications import vgg16

In [22]:
# Loading the data

!unzip '/content/drive/MyDrive/Colab Notebooks/tomato_leaf_images.zip'

Archive:  /content/drive/MyDrive/Colab Notebooks/tomato_leaf_images.zip
replace __MACOSX/._tomato_leaf_images? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [23]:
# prepare the dataset for training and validation

training_data = Path('/content/tomato_leaf_images/train')

validation_data = Path('/content/tomato_leaf_images/val')

#Zip file takes less time compared to unzip files to read

In [24]:
# Labels the files name by using integer

labels_dict ={'AmericanLeafMiner':0,'Healthy':1,
              'MagnesiumDeficiency':2,'SerpentineLeafMiner':3}

In [25]:
# Creating a dataframe for the training dataset

train_df =[]

for folder in os.listdir(training_data):
  imgs_path =training_data /folder
  #get the list of all the images stored in that directory
  imgs = sorted(imgs_path.glob('*.jpg'))  # this will store all formats into jpg only

  # store each image path and corresponding label
  for img_name in imgs:
    train_df.append((str(img_name),labels_dict[folder]))

train_df = pd.DataFrame(train_df, columns =['image','label'], index =None)

  # shuffle the dataset
train_df = train_df.sample(frac =1.).reset_index(drop =True)


In [26]:
train_df

Unnamed: 0,image,label
0,/content/tomato_leaf_images/train/MagnesiumDef...,2
1,/content/tomato_leaf_images/train/MagnesiumDef...,2
2,/content/tomato_leaf_images/train/SerpentineLe...,3
3,/content/tomato_leaf_images/train/MagnesiumDef...,2
4,/content/tomato_leaf_images/train/AmericanLeaf...,0
...,...,...
4249,/content/tomato_leaf_images/train/MagnesiumDef...,2
4250,/content/tomato_leaf_images/train/Healthy/IMG2...,1
4251,/content/tomato_leaf_images/train/SerpentineLe...,3
4252,/content/tomato_leaf_images/train/MagnesiumDef...,2


In [27]:
# Creating a dataframe for the validation dataset

valid_df =[]

for folder in os.listdir(validation_data):
  imgs_path =validation_data /folder
  #get the list of all the images stored in that directory
  imgs = sorted(imgs_path.glob('*.jpg'))  # this will store all formats into jpg only

  # store each image path and corresponding label
  for img_name in imgs:
    valid_df.append((str(img_name),labels_dict[folder]))

valid_df = pd.DataFrame(valid_df, columns =['image','label'], index =None)

  # shuffle the dataset
valid_df = valid_df.sample(frac =1.).reset_index(drop =True)


In [28]:
valid_df

Unnamed: 0,image,label
0,/content/tomato_leaf_images/val/MagnesiumDefic...,2
1,/content/tomato_leaf_images/val/SerpentineLeaf...,3
2,/content/tomato_leaf_images/val/MagnesiumDefic...,2
3,/content/tomato_leaf_images/val/SerpentineLeaf...,3
4,/content/tomato_leaf_images/val/SerpentineLeaf...,3
...,...,...
466,/content/tomato_leaf_images/val/SerpentineLeaf...,3
467,/content/tomato_leaf_images/val/SerpentineLeaf...,3
468,/content/tomato_leaf_images/val/AmericanLeafMi...,0
469,/content/tomato_leaf_images/val/AmericanLeafMi...,0


In [29]:
# Configuration

# dimensions to consider for the image
img_rows, img_cols, img_channels = 224,224,3 # to standardize the each image to the given value

#batch_size
batch_size =8

# total no of classes
no_of_classes =4

In [30]:
# Data Augmentation

import imgaug as ia
from imgaug import augmenters as iaa
seed =1234
ia.seed(seed)

# Augmentation sequence

seq = iaa.OneOf([
    iaa.Fliplr(),
    iaa.Affine(rotate =20),
    iaa.Multiply((1.2,1.5))])

In [52]:
# Data Generator

def data_generator(data, batch_size, preprocessing_fn =None, is_validation_data =False):
  n = len(data)
  no_of_batches =int(np.ceil(n/batch_size))
  indices = np.arange(n)

  while True:
    if not is_validation_data:
      np.random.shuffle(indices)

    for i in range(no_of_batches):
      next_batch_indices = indices[i*batch_size:(i+i)*batch_size]
      no_of_examples = len(next_batch_indices)

      # Define two numpy array for containing batch data and labels

      batch_data =np.zeros((no_of_examples, img_rows,img_cols,img_channels),dtype=np.float32)
      batch_labels =np.zeros((no_of_examples,no_of_classes),dtype = np.float32)

      # process the next batch

      for j, idx in enumerate(next_batch_indices):
        img = cv2.imread(data.iloc[idx]['image'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = data.iloc[idx]['label']

        if not is_validation_data:
          img = seq.augment_image(img)

        img = cv2.resize(img, (img_rows, img_cols)).astype(np.float32)
        batch_data[j] = img
        batch_labels[j] = to_categorical(label, num_classes =no_of_classes)

        if preprocessing_fn is not None:
          batch_data = preprocessing_fn(batch_data)

        yield batch_data, batch_labels



In [53]:
# VGG16 model

preprocessing_fn = vgg16.preprocess_input

train_data_gen = data_generator(train_df, batch_size, preprocessing_fn)
valid_data_gen = data_generator(valid_df, batch_size,preprocessing_fn, is_validation_data =True)

# Transfer Learning with Fine Tuning

In [54]:
def get_base_model():
  base_model = vgg16.VGG16(input_shape =(img_rows,img_cols, img_channels),
                           weights ='imagenet',include_top =True)
  return base_model

In [55]:
# get the base model
base_model =get_base_model()

#get the output of the second last dense layer
base_model_output = base_model.layers[-2].output   # till 4th block we are freezing, we are modifying 5th block

# add new layers
x = L.Dropout(0.5, name= 'drop2')(base_model_output) # we are taking 5th block as x with 50% dropout
output =L.Dense(no_of_classes, activation ='softmax',name ='fc3')(x)

# Define a new model
model = Model(base_model.input, output)

In [56]:
# Freeze  all the base model layers
"""
for layer in base_model.layers[:-1]:
  layer.trainable = False
  """

'\nfor layer in base_model.layers[:-1]:\n  layer.trainable = False\n  '

In [57]:
# Compile the model and check it

optimizer = RMSprop(0.001) #0.001 =learning rate(eta)
model.compile(optimizer = optimizer, loss ='categorical_crossentropy', metrics =['accuracy'])
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [61]:
# Model Training

# Use Early Stopping
es = EarlyStopping(patience = 100,restore_best_weights =True)

#Checkpoint to save model

chkpt =ModelCheckpoint(filepath ='model_checkpoint.h5',save_best_only =True)

# number of training and validation steps for training and validation
no_of_train_steps = int(np.ceil(len(train_df)/batch_size))
no_of_valid_steps = int(np.ceil(len(valid_df)/batch_size))

# number of epochs
no_of_epochs =5

In [62]:
# train the model

history =model.fit_generator(train_data_gen, epochs =no_of_epochs,
                             steps_per_epoch =no_of_train_steps,validation_data=valid_data_gen,
                             validation_steps =no_of_valid_steps,callbacks =[es,chkpt])

Epoch 1/5


  history =model.fit_generator(train_data_gen, epochs =no_of_epochs,




  saving_api.save_model(


Epoch 2/5

ResourceExhaustedError: ignored

In [None]:
# Try the above one with EfficientNEtB0

base_model = keras.applications.efficientNetB0.EfficientNet50(include_top =True)