In [None]:
import tensorflow as tf
import numpy as np
import sklearn
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from PIL import Image
from keras.models import Model
import pandas as pd

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/Drive', force_remount=True)
# When you run this block, you will need to click open a link to get some 
import os
#TODO
os.chdir('/content/Drive/My Drive/DataMining_Project')

Mounted at /content/Drive


### Model Definition

#### Downblock

In [None]:
"""
  A downsample block consists of a 
    - Convolutional Layer
    - Batch Norm Layer
    - Activation Function (This can be experimented with as necessary)

"""
def downsample(stride = 2, filters = 32, dropout = 0):
  result = tf.keras.Sequential()
  result.add(tf.keras.layers.Conv2D(kernel_size = 2, strides = stride,  filters = filters,  kernel_regularizer='l2'))
  result.add(tf.keras.layers.BatchNormalization())
  result.add(tf.keras.layers.Dropout(dropout))
  result.add(tf.keras.layers.ReLU())
  return result
    

#### Models

##### Shallow

In [None]:
def shallow_CNN(model_type = None, dropout = 0,  in_shape = (256,256,3)):
  assert model_type in ["disease", "plant"]
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  

  #Create the input
  input = tf.keras.Input(shape =  in_shape )
  next = input
  next = tf.keras.layers.Conv2D(kernel_size = 256, strides = 1,  filters = 256,  kernel_regularizer='l2')(next)
  next = tf.keras.layers.BatchNormalization()(next)
  next = tf.keras.layers.ReLU()(next)
  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  
  #Add another dense layer to resize to number of classes
  if model_type == "plant":
    next = tf.keras.layers.Dense(11)(next)
  else:
    next = tf.keras.layers.Dense(38)(next)
  
  #Turn the final layer into a probability layer to get the predictions
  final = tf.keras.layers.Softmax()(next)  
  #final = tf.math.argmax(final, axis = -1).int64()  
  #print(type(final))
  return tf.keras.Model(inputs = input, outputs = final)

##### Large

In [None]:
def large_CNN(model_type = None, dropout = 0, in_shape = (256,256,3)):
  assert model_type in ["plant", "disease"]
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  downsample_blocks = [
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 128,128, 8]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 64, 64, 16]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 32,32, 32]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 16,16, 64]
                       downsample(stride = 2, filters = 256, dropout = dropout),   #output shape : [None, 8,8, 128]
                       downsample(stride = 2, filters = 256, dropout = dropout),   #output shape : [None, 4,4, 128]
                       downsample(stride = 2, filters = 256, dropout = dropout),   #output shape : [None, 2,2, 128]
                       downsample(stride = 2, filters = 256, dropout = dropout),   #output shape : [None, 1,1, 128]
                      ]

  #Create the input
  input = tf.keras.Input(shape = in_shape)
  next = input
  #Pass it through each CNN layer
  for downblock in downsample_blocks:
    next = downblock(next)
    #print(next.shape)

  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  #print(next.shape)
  #Add another dense layer to resize to number of classes
  if model_type == "plant":
    next = tf.keras.layers.Dense(11)(next)
  else:
    next = tf.keras.layers.Dense(38)(next)
  
  #print(next.shape)
  #Turn the final layer into a probability layer to get the predictions
  final = tf.keras.layers.Softmax()(next)  
  #final = tf.math.argmax(final, axis = -1).int64()  
  #print(type(final))
  return tf.keras.Model(inputs = input, outputs = final)

##### Basic

In [None]:
def basic_CNN(model_type = None, dropout = 0,  in_shape = (256,256,3)):
  assert model_type in ["disease", "plant"]
  print(in_shape)
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  while current_image > 1:
    
    downsample_blocks.append(downsample(stride = 2, filters = 128, dropout = dropout))
    current_image = current_image/2

  #Create the input
  input = tf.keras.Input(shape =  in_shape )
  next = input/255
  #Pass it through each CNN layer
  for downblock in downsample_blocks:
    next = downblock(next)
    

  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  
  #Add another dense layer to resize to number of classes
  if model_type == "plant":
    next = tf.keras.layers.Dense(11)(next)
  else:
    next = tf.keras.layers.Dense(38)(next)
  
  #Turn the final layer into a probability layer to get the predictions
  final = tf.keras.layers.Softmax()(next)  
  #final = tf.math.argmax(final, axis = -1).int64()  
  #print(type(final))
  return tf.keras.Model(inputs = input, outputs = final)


##### Small

In [None]:
def small_CNN(model_type = None, dropout = 0,  in_shape = (256,256,3)):
  assert model_type in ["disease", "plant"]
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  downsample_blocks = [
                       downsample(stride = 2, filters = 8, dropout = dropout),   #output shape : [None, 128,128, 8]
                       downsample(stride = 2, filters = 16, dropout = dropout),   #output shape : [None, 64, 64, 16]
                       downsample(stride = 2, filters = 32, dropout = dropout),   #output shape : [None, 32,32, 32]
                       downsample(stride = 2, filters = 64, dropout = dropout),   #output shape : [None, 16,16, 64]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 8,8, 128]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 4,4, 128]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 2,2, 128]
                       downsample(stride = 2, filters = 128, dropout = dropout),   #output shape : [None, 1,1, 128]
                      ]

  #Create the input
  input = tf.keras.Input(shape =  in_shape )
  next = input
  #Pass it through each CNN layer
  for downblock in downsample_blocks:
    next = downblock(next)
    #print(next.shape)

  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  
  if model_type == "plant":
    next = tf.keras.layers.Dense(11)(next)
  else:
    next = tf.keras.layers.Dense(38)(next)
  
  final = tf.keras.layers.Softmax()(next)  
  
  return tf.keras.Model(inputs = input, outputs = final)

##### Tiny

In [None]:
def tiny_CNN(model_type = None, dropout = 0,  in_shape = (256,256,3)):
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  downsample_blocks = [
                       downsample(stride = 4, filters = 4, dropout = dropout),    #output shape : [None, 64, 64, 4]
                       downsample(stride = 4, filters = 8, dropout = dropout),   #output shape : [None, 16, 16, 8]
                       downsample(stride = 4, filters = 16, dropout = dropout),   #output shape : [None, 4, 4, 16]
                       downsample(stride = 4, filters = 32, dropout = dropout),   #output shape : [None, 1, 1, 32]
                      ]

  #Create the input
  input = tf.keras.Input(shape = in_shape)
  next = input
  #Pass it through each CNN layer
  for downblock in downsample_blocks:
    next = downblock(next)
    #print(next.shape)

  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  #print(next.shape)
  #Add another dense layer to resize to number of classes
  if model_type == "plant":
    next = tf.keras.layers.Dense(11)(next)
  else:
    next = tf.keras.layers.Dense(38)(next)
  #print(next.shape)
  #Turn the final layer into a probability layer to get the predictions
  final = tf.keras.layers.Softmax()(next)  
  #final = tf.math.argmax(final, axis = -1).int64()  
  #print(type(final))
  return tf.keras.Model(inputs = input, outputs = final)

##### Tiny-ish

In [None]:
def tinish_CNN():
  current_image = 256
  downsample_blocks = []
  #Determine how many cnn blocks are necessary
  downsample_blocks = [
                       downsample(stride = 4, filters = 4),    #output shape : [None, 64, 64, 4]
                       downsample(stride = 4, filters = 8),   #output shape : [None, 16, 16, 8]
                       downsample(stride = 4, filters = 16),   #output shape : [None, 4, 4, 16]
                       downsample(stride = 2, filters = 32),   #output shape : [None, 2, 2, 32]
                       downsample(stride = 2, filters = 64),   #output shape : [None, 1, 1, 32]
                      ]

  #Create the input
  input = tf.keras.Input(shape = (256,256,3))
  next = input
  #Pass it through each CNN layer
  for downblock in downsample_blocks:
    next = downblock(next)
    #print(next.shape)

  #Get a fully connected layer : This will be used to extract the input for the random forest and the SVM models
  next = tf.keras.layers.Flatten()(next)
  next = tf.keras.layers.Dense(100,  kernel_regularizer='l2')(next)
  #print(next.shape)
  #Add another dense layer to resize to number of classes
  next = tf.keras.layers.Dense(38)(next)
  #print(next.shape)
  #Turn the final layer into a probability layer to get the predictions
  final = tf.keras.layers.Softmax()(next)  
  #final = tf.math.argmax(final, axis = -1).int64()  
  #print(type(final))
  return tf.keras.Model(inputs = input, outputs = final)


### Create the Dataset

In [None]:
def create_dataset(path, batch_size):
  dataset_location = path
  if "processed" in path:
    train_data = tf.keras.utils.image_dataset_from_directory(dataset_location, 
                                                            labels = "inferred", 
                                                            label_mode = "categorical", 
                                                            image_size = (256,256),
                                                            subset =  "training",
                                                            validation_split = .2,
                                                            color_mode = "rgba",
                                                            batch_size = batch_size,
                                                            shuffle = True,
                                                            seed = 1)
    validation_data = tf.keras.utils.image_dataset_from_directory(dataset_location, 
                                                              labels = "inferred", 
                                                              label_mode = "categorical", 
                                                              image_size = (256,256),
                                                              subset =  "validation",
                                                              validation_split = .2,
                                                              batch_size = batch_size,
                                                              color_mode = "rgba",
                                                              shuffle = True,
                                                              seed = 1)
    return train_data, validation_data
  train_data = tf.keras.utils.image_dataset_from_directory(dataset_location, 
                                                            labels = "inferred", 
                                                            label_mode = "categorical", 
                                                            image_size = (256,256),
                                                            subset =  "training",
                                                            validation_split = .2,
                                                            batch_size = batch_size,
                                                            shuffle = True,
                                                            seed = 1)
  validation_data = tf.keras.utils.image_dataset_from_directory(dataset_location, 
                                                            labels = "inferred", 
                                                            label_mode = "categorical", 
                                                            image_size = (256,256),
                                                            subset =  "validation",
                                                            validation_split = .2,
                                                            batch_size = batch_size,
                                                            shuffle = True,
                                                            seed = 1)
  return train_data, validation_data

In [None]:
def create_class_weights(path):
  counts = []    
  old_location = path
  file_dirs = os.walk(old_location)
  file_dirs = sorted(file_dirs)
  for i,dir in enumerate(file_dirs):
      if dir[0] == path:
        continue
      counts.append(len(os.listdir(dir[0])))
  max_count = max(counts)
  class_weights = max_count / np.array(counts )
  weights = {}
  for i, weight in enumerate(class_weights):
    weights[i] = weight
  return weights 


In [None]:
class_weights = create_class_weights("data/fruit_class")
print(class_weights)

NameError: ignored

### Train

#### Train the basic model

In [None]:
train_data, validation_data = create_dataset("data/fruit_class/")

NameError: ignored

In [None]:
for image in validation_data:
  labels = image[1].shape
  inputs = image[0].shape
  print(labels, inputs)


In [None]:
model = basic_CNN(model_type = "plant")
print(model.summary())

(None, 128, 128, 128)
(None, 64, 64, 128)
(None, 32, 32, 128)
(None, 16, 16, 128)
(None, 8, 8, 128)
(None, 4, 4, 128)
(None, 2, 2, 128)
(None, 1, 1, 128)
(None, 100)
(None, 11)
Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 sequential_40 (Sequential)  (None, 128, 128, 128)     2176      
                                                                 
 sequential_41 (Sequential)  (None, 64, 64, 128)       66176     
                                                                 
 sequential_42 (Sequential)  (None, 32, 32, 128)       66176     
                                                                 
 sequential_43 (Sequential)  (None, 16, 16, 128)       66176     
                                                                 
 sequential_44

In [None]:
model.compile(optimizer="Adam", loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ["accuracy", "mae"])
dropout = .05

save_location = f"plant_prediction_models/basic/dropout_{dropout * 100}_l2/"
for i in range(5):
  model.fit(train_data, batch_size = 32, epochs = 20, validation_freq = 5, validation_data = validation_data, class_weight = class_weights)
  model.save(save_location + f"ckpt_{i}")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20

### Evaluate Performance

In [None]:
#Load the model
def test_model(model_name):
  model = tf.keras.models.load_model(model_name)
  model.compile(optimizer = "Adam" , loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ["mae", "accuracy"])
  #model.evaluate(validation_data)
  predictions = np.argmax(model.predict(validation_data), axis = 1)
  print(predictions.shape)
  print(np.bincount(predictions))

In [None]:
test_model(os.path.join(save_location, "ckpt_2"))

(1713,)
[  29  387    0    0    0    0    0    0    2    1 1294]


### Run Multiple Models

In [None]:
from tensorflow.python.platform.tf_logging import log
def run_various_CNNs(data_path):
  models = [basic_CNN, small_CNN, tiny_CNN]
  model_name = ["basic", "small", "tiny"]
  dropouts = [0, .05, .1, .15]
  class_weights = create_class_weights(data_path)
  model_type = data_path.split('/')[-1]
  train_data, validation_data = create_dataset(data_path)
  print("Made it here")
  for m, mn in zip(models, model_name):
    for d in dropouts:
      save_location = f"saved_models/{model_type}/{mn}/{d * 100}/"
      log_dir = save_location + f"/logs"
      tf_callbacks = tf.keras.callbacks.TensorBoard(log_dir = log_dir, update_freq = 1 )
      if "fruit_class" in data_path:
        model = m(model_type = "plant", dropout = d)
      elif "processed" in data_path:
        print("using processed")
        model = m(model_type = "disease", dropout = d, in_shape = (256,256,4))
      else:
        model = m(model_type = "disease", dropout = d)
      model.compile(optimizer="Adam", loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ["accuracy", "mae"])
      for i in range(4):
        model.fit(train_data, epochs = 10, validation_freq = 2, validation_data = validation_data, class_weight = class_weights, callbacks = [tf_callbacks])
        model.save(save_location + f"ckpt_{i}")


In [None]:
run_various_CNNs("data/processed")
#run_various_CNNs("data/fruit_class")

Found 8567 files belonging to 38 classes.
Using 6854 files for training.
Found 8567 files belonging to 38 classes.
Using 1713 files for validation.
Made it here
using processed
(256, 256, 4)
Epoch 1/10


### Train the Shallow Model

In [None]:
path = "data/processed"
train_data, validation_data = create_dataset(path)
model = shallow_CNN(model_type = "disease", dropout = 0, in_shape = (256,256,4))
class_weights = create_class_weights(path)


Found 8567 files belonging to 38 classes.
Using 6854 files for training.
Found 8567 files belonging to 38 classes.
Using 1713 files for validation.


In [None]:
log_dir = 'saved_models/shallow/logs'
filepath = "saved_models/shallow/checkpoints/"
tf_callbacks = tf.keras.callbacks.TensorBoard(log_dir = log_dir, update_freq = 1 )
tf_modelCkpt = tf.keras.callbacks.ModelCheckpoint(filepath, monitor="val_loss", save_best_only=True,  save_weights_only=False, mode="auto", save_freq='epoch', initial_value_threshold=None)


In [None]:
model.compile(optimizer="Adam", loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ["accuracy", "mae"])
model.fit(train_data, epochs = 150, validation_freq = 2, validation_data = validation_data, class_weight = class_weights, callbacks = [tf_callbacks, tf_modelCkpt])

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7f866637eb50>

In [None]:
print(np.unique(np.argmax(model.predict(validation_data), axis = 1)))


[ 0  4  5 11 15 20 22 24 31 32 37]


### Train Processed


In [None]:
path = "data/processed"
train_data, validation_data = create_dataset(path, batch_size = 128)
model = basic_CNN(model_type = "disease", dropout = 0, in_shape = (256,256,4))
class_weights = create_class_weights(path)
log_dir = 'saved_models/experimental/logs'
filepath = "saved_models/experimental/checkpoints/"
tf_callbacks = tf.keras.callbacks.TensorBoard(log_dir = log_dir, update_freq = 1 )
tf_modelCkpt = tf.keras.callbacks.ModelCheckpoint(filepath, monitor="val_loss", save_best_only=True,  save_weights_only=False, mode="auto", save_freq='epoch', initial_value_threshold=None)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss = tf.keras.losses.CategoricalCrossentropy(), metrics = [tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy", dtype=None)])
model.fit(train_data, epochs = 150, validation_freq = 2, validation_data = validation_data, class_weight = class_weights, callbacks = [tf_callbacks, tf_modelCkpt])

Found 8567 files belonging to 38 classes.
Using 6854 files for training.
Found 8567 files belonging to 38 classes.
Using 1713 files for validation.
(256, 256, 4)
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150


KeyboardInterrupt: ignored

### Resnet

In [None]:
def resnet_model(in_shape = (256,256,3)):
  model = tf.keras.models.Sequential()
  input = tf.keras.Input(shape = in_shape)
  resnet = tf.keras.applications.ResNet50(input_tensor = input)
  model.add(resnet)
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(100))
  model.add(tf.keras.layers.Dense(38, activation = "softmax"))
  return model



In [None]:
model = resnet_model()
path = "data/raw"
train_data, validation_data = create_dataset(path, batch_size = 32)
class_weights = create_class_weights(path)
log_dir = 'saved_models/ResNet101V3/logs'
filepath = "saved_models/ResNet101V3/checkpoints/"
tf_callbacks = tf.keras.callbacks.TensorBoard(log_dir = log_dir, update_freq = 1 )
tf_modelCkpt = tf.keras.callbacks.ModelCheckpoint(filepath, monitor="val_loss", save_best_only=True,  save_weights_only=False, mode="auto", save_freq='epoch', initial_value_threshold=None)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss = tf.keras.losses.CategoricalCrossentropy(), metrics = [tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy", dtype=None)])
model.fit(train_data, epochs = 150, validation_freq = 2, validation_data = validation_data, class_weight = class_weights, callbacks = [tf_callbacks, tf_modelCkpt])

Found 8567 files belonging to 38 classes.
Using 6854 files for training.
Found 8567 files belonging to 38 classes.
Using 1713 files for validation.
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epo

<keras.callbacks.History at 0x7fc9c44426d0>

In [None]:
model = tf.keras.models.load_model("saved_models/ResNet101V2/checkpoints/")
path = "data/raw"
train_data, validation_data = create_dataset(path, batch_size = 32)
class_weights = create_class_weights(path)
log_dir = 'saved_models/ResNet101V2_continued/logs'
filepath = "saved_models/ResNet101V2_continued/checkpoints/"
tf_callbacks = tf.keras.callbacks.TensorBoard(log_dir = log_dir, update_freq = 1 )
tf_modelCkpt = tf.keras.callbacks.ModelCheckpoint(filepath, monitor="val_loss", save_best_only=True,  save_weights_only=False, mode="auto", save_freq='epoch', initial_value_threshold=None)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss = tf.keras.losses.CategoricalCrossentropy(), metrics = [tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy", dtype=None)])
model.fit(train_data, epochs = 150, validation_freq = 2, validation_data = validation_data, class_weight = class_weights, callbacks = [tf_callbacks, tf_modelCkpt])

Found 8567 files belonging to 38 classes.
Using 6854 files for training.
Found 8567 files belonging to 38 classes.
Using 1713 files for validation.
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epo

KeyboardInterrupt: ignored

### Train Model With Plant Label

In [None]:
import cv2
plants = ["apple",
    "banana",
    "cherry",
    "corn",
    "grape",
    "orange",
    "pepper",
    "potato",
    "strawberry",
    "tea",
    "tomato"]
def create_dataset():
    path = "data/raw"
    for i, dir in enumerate(os.walk(path)):
      print(dir[0].split('/')[-1])
      if i != 0 :
        for plant in plants:
          if plant in dir[0].split('/')[-1]:
            print(plants.index(plant))
      #for i in os.listdir(dir[0]):
        #im = cv2.imread(os.path.join(dir[0], i))
create_dataset()   

raw
corn (maize) cercospora leaf spot gray leaf spot
3
apple black rot
0
banana healthy
1
tomato late blight
10
banana segatoka
1
pepper, bell healthy
6
cherry (including sour) healthy
2
potato healthy
7
apple cedar apple rust
0
orange haunglongbing (citrus greening)
5
tea red leaf spot
9
tea leaf blight
9
tomato leaf mold
10
strawberry healthy
8
tomato tomato mosaic virus
10
tomato target spot
10
pepper, bell bacterial spot
6
corn (maize) healthy
3
tomato early blight
10
corn (maize) common rust
3
cherry (including sour) powdery mildew
2
banana xamthomonas
1
corn (maize) northern leaf blight
3
tomato tomato yellow leaf curl virus
10
grape esca (black measles)
4
strawberry leaf scorch
8
grape leaf blight (isariopsis leaf spot)
4
potato late blight
7
tomato bacterial spot
10
tomato spider mites two-spotted spider mite
10
grape healthy
4
apple apple scab
0
grape black rot
4
potato early blight
7
apple healthy
0
tomato septoria leaf spot
10
tomato healthy
10
tea red scab
9


#Additional Models

In [None]:
model = tf.keras.models.load_model('saved_models/ResNet101V2_continued/checkpoints')

In [None]:
disease_list = []
for i,dir in enumerate(os.walk("data/raw")):
  if dir[0] == "data/raw":
    continue
  disease_list.append(dir[0][dir[0].rfind('/')+1:])
disease_list.sort()
labels_dict = {}
for x in range(len(disease_list)):
  labels_dict[x] = disease_list[x]
labels_dict

{0: 'apple apple scab',
 1: 'apple black rot',
 2: 'apple cedar apple rust',
 3: 'apple healthy',
 4: 'banana healthy',
 5: 'banana segatoka',
 6: 'banana xamthomonas',
 7: 'cherry (including sour) healthy',
 8: 'cherry (including sour) powdery mildew',
 9: 'corn (maize) cercospora leaf spot gray leaf spot',
 10: 'corn (maize) common rust',
 11: 'corn (maize) healthy',
 12: 'corn (maize) northern leaf blight',
 13: 'grape black rot',
 14: 'grape esca (black measles)',
 15: 'grape healthy',
 16: 'grape leaf blight (isariopsis leaf spot)',
 17: 'orange haunglongbing (citrus greening)',
 18: 'pepper, bell bacterial spot',
 19: 'pepper, bell healthy',
 20: 'potato early blight',
 21: 'potato healthy',
 22: 'potato late blight',
 23: 'strawberry healthy',
 24: 'strawberry leaf scorch',
 25: 'tea leaf blight',
 26: 'tea red leaf spot',
 27: 'tea red scab',
 28: 'tomato bacterial spot',
 29: 'tomato early blight',
 30: 'tomato healthy',
 31: 'tomato late blight',
 32: 'tomato leaf mold',
 33:

In [None]:
path = 'data/raw'
train_data, validation_data = create_dataset(path, batch_size = 1)

Found 8568 files belonging to 38 classes.
Using 6855 files for training.
Found 8568 files belonging to 38 classes.
Using 1713 files for validation.


In [None]:
val_x = []
val_y = []

intermediate_layer_model = Model(inputs=model.input, outputs=model.layers[-2].output)


for x in validation_data:
  val_y.append(labels_dict[int(tf.math.argmax(x[1][0]))])
  temp = intermediate_layer_model.predict(x[0])
  temp = np.array(temp)
  temp = temp.reshape(100)
  val_x.append(temp)

train_x = []
train_y = []

for x in train_data:
  train_y.append(labels_dict[int(tf.math.argmax(x[1][0]))])
  temp = intermediate_layer_model.predict(x[0])
  
  temp = np.array(temp)
  temp = temp.reshape(100)
  train_x.append(temp)




In [None]:
class_weights = create_class_weights("data/raw")
print(class_weights)
rf_weights = {}
for x, y in class_weights.items():
  rf_weights[labels_dict[x]] = y
rf_weights

{0: 5.3156146179401995, 1: 19.047619047619047, 2: 7.407407407407407, 3: 25.0, 4: 2.5806451612903225, 5: 1.25, 6: 1.0, 7: 22.22222222222222, 8: 22.22222222222222, 9: 8.0, 10: 12.121212121212121, 11: 8.040201005025125, 12: 7.8431372549019605, 13: 22.22222222222222, 14: 12.903225806451612, 15: 5.970149253731344, 16: 40.0, 17: 17.391304347826086, 18: 9.30232558139535, 19: 10.256410256410257, 20: 10.0, 21: 9.30232558139535, 22: 9.090909090909092, 23: 14.285714285714286, 24: 7.407407407407407, 25: 20.0, 26: 20.0, 27: 20.0, 28: 14.285714285714286, 29: 9.090909090909092, 30: 11.428571428571429, 31: 7.2727272727272725, 32: 10.256410256410257, 33: 10.0, 34: 12.903225806451612, 35: 28.571428571428573, 36: 11.11111111111111, 37: 6.779661016949152}


{'apple apple scab': 5.3156146179401995,
 'apple black rot': 19.047619047619047,
 'apple cedar apple rust': 7.407407407407407,
 'apple healthy': 25.0,
 'banana healthy': 2.5806451612903225,
 'banana segatoka': 1.25,
 'banana xamthomonas': 1.0,
 'cherry (including sour) healthy': 22.22222222222222,
 'cherry (including sour) powdery mildew': 22.22222222222222,
 'corn (maize) cercospora leaf spot gray leaf spot': 8.0,
 'corn (maize) common rust': 12.121212121212121,
 'corn (maize) healthy': 8.040201005025125,
 'corn (maize) northern leaf blight': 7.8431372549019605,
 'grape black rot': 22.22222222222222,
 'grape esca (black measles)': 12.903225806451612,
 'grape healthy': 5.970149253731344,
 'grape leaf blight (isariopsis leaf spot)': 40.0,
 'orange haunglongbing (citrus greening)': 17.391304347826086,
 'pepper, bell bacterial spot': 9.30232558139535,
 'pepper, bell healthy': 10.256410256410257,
 'potato early blight': 10.0,
 'potato healthy': 9.30232558139535,
 'potato late blight': 9.09

##Random Forest

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

#train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
# Instantiate model with 1000 decision trees
rf = RandomForestClassifier(n_estimators = 1000, random_state = 42)
# Train the model on training data
#rf.fit(train_features, train_labels);
rf.fit(train_x, train_y)

RandomForestClassifier(n_estimators=1000, random_state=42)

In [None]:
path = 'data/raw'
orig_train_data, orig_validation_data = create_dataset(path, batch_size = 32)

Found 8568 files belonging to 38 classes.
Using 6855 files for training.
Found 8568 files belonging to 38 classes.
Using 1713 files for validation.


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
predictions = rf.predict(val_x)
print(confusion_matrix(val_y, predictions))
print(classification_report(val_y,predictions))

[[40  2 12 ...  0  0  0]
 [ 0  4  0 ...  0  0  0]
 [16  0 19 ...  0  0  0]
 ...
 [ 1  0  1 ...  2  0  0]
 [ 1  0  0 ...  0  2  1]
 [ 1  0  1 ...  0  0 18]]
                                                  precision    recall  f1-score   support

                                apple apple scab       0.65      0.66      0.65        61
                                 apple black rot       0.27      0.25      0.26        16
                          apple cedar apple rust       0.54      0.53      0.54        36
                                   apple healthy       0.14      0.18      0.16        11
                                  banana healthy       0.91      0.92      0.92       129
                                 banana segatoka       0.94      0.93      0.94       259
                              banana xamthomonas       0.91      0.93      0.92       310
                 cherry (including sour) healthy       0.71      0.71      0.71        14
          cherry (including sour)

##SVM

In [None]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(train_x, train_y)
pred = svclassifier.predict(val_x)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(val_y, pred))
print(classification_report(val_y,pred))

[[55  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [35  0  0 ...  0  0  0]
 ...
 [ 3  0  0 ...  1  0  0]
 [ 0  0  0 ...  0  0  2]
 [ 0  0  0 ...  0  0 32]]
                                                  precision    recall  f1-score   support

                                apple apple scab       0.56      0.90      0.69        61
                                 apple black rot       0.00      0.00      0.00        16
                          apple cedar apple rust       0.00      0.00      0.00        36
                                   apple healthy       0.00      0.00      0.00        11
                                  banana healthy       0.90      0.95      0.92       129
                                 banana segatoka       0.95      0.93      0.94       259
                              banana xamthomonas       0.92      0.95      0.93       310
                 cherry (including sour) healthy       0.48      0.79      0.59        14
          cherry (including sour)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
