<a href="https://colab.research.google.com/github/KimaniKibuthu/Plant-Pathology/blob/main/Plant_Pathology.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Plant Pathology

The main objective of the competition is to develop machine learning-based models to accurately classify a given leaf image from the test dataset to a particular disease category, and to identify an individual disease from multiple disease symptoms on a single leaf image. 



# Libraries and Functions

In [None]:
# %tensorflow_version 2.x
# import tensorflow as tf
# print("Tensorflow version " + tf.__version__)

# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
!pip uninstall kaggle
!pip install --upgrade kaggle

In [None]:
# General libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Modelling
from tensorflow.keras import Sequential, Model
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D

In [None]:
# Variables
TARGET_SIZE = 224
TEST_SIZE = 0.5
BATCH_SIZE = 64
RANDOM_STATE = 42
EPOCHS = 25
LR = 0.001

# Data Obtainance

In [None]:
# Load credentials
def credentials(name, token_id):
   # Setup the username and ID
  os.environ["KAGGLE_USERNAME"] = name
  os.environ["KAGGLE_KEY"] = token_id

  print('Done!')


In [None]:
# Get data

credentials('kimanikibuthu', 'f85c0bb5d43058fddcce7902e1325677')

!kaggle competitions download -c plant-pathology-2021-fgvc8

In [None]:
!unzip /content/plant-pathology-2021-fgvc8.zip

In [None]:
# Load into variables
train = pd.read_csv('/content/train.csv')
submission = pd.read_csv('/content/sample_submission.csv')


# Data Preparation

**General Exploration**

In [None]:
train.head()

In [None]:
# Info
train.info()

**Explore Labels**

In [None]:
# Value_counts
train['labels'].value_counts(normalize=True)*100

In [None]:
# Visualize them
plt.figure(figsize=(12, 8))
sns.countplot(y='labels', 
              data=train,
              palette='husl')

The data is imbalanced, hence we will need to deal with it.

In [None]:
# Label encode the  labels

map = {}
for key, value in dict(enumerate(train['labels'].unique())).items():
  map[value] = str(key)

train['labels'] = train['labels'].replace(map)

**Visualize Images**

In [None]:

def show_images(label, data):
    # Get images 
    df = data.loc[data['labels'] == label]
    images = df['image'].values

    # Extract 16 random images from it
    random_images = [np.random.choice(images) for i in range(9)]

    # Adjust the size of your images
    plt.figure(figsize=(16,12))

    # Iterate and plot random images
    for i in range(9):
        plt.subplot(3,3, i + 1)
        img = plt.imread(os.path.join('/content/train_images', random_images[i]))
        
        try:
          plt.imshow(img, cmap='gray')
          plt.axis('off')
        except FileNotFoundError:
          pass

    # Adjust subplot parameters to give specified padding
    plt.tight_layout() 

In [None]:
map

***Healthy(0)***

In [None]:
show_images('0', train)

***

***scab frog_eye_leaf_spot complex (1)***

In [None]:
show_images('1', train)

***scab (2)***

In [None]:
show_images('2', train)

***complex (3)***

In [None]:
show_images('3', train)

***rust (4)***

In [None]:
show_images('4', train)

***frog_eye_leaf_spot (5)***

In [None]:
show_images('5', train)

***powdery_mildew (6)***

In [None]:
show_images('6', train)

***scab frog_eye_leaf_spot (7)***

In [None]:
show_images('7', train)

***frog_eye_leaf_spot complex (8)***

In [None]:
show_images('8', train)

***rust frog_eye_leaf_spot (9)***

In [None]:
show_images('9', train)

***powdery_mildew complex (10)***

In [None]:
show_images('10', train)

***rust complex (11)***

In [None]:
show_images('11', train)

In [None]:
# Split data

train_set, val_set = train_test_split(train,
                                      test_size=0.1,
                                      random_state=RANDOM_STATE,
                                      stratify=train['labels'])

train_set = train_set.reset_index(drop=True)
val_set = val_set.reset_index(drop=True)


**Loading Into Generators**

In [None]:
train_datagen = ImageDataGenerator(
    #rescale=1/255,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input
    )

val_datagen = ImageDataGenerator(
    #rescale=1/255,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input
    )

In [None]:
train_flow = train_datagen.flow_from_dataframe(
    train_set,
    '/content/train_images',
    x_col = 'image',
    y_col = 'labels',
    target_size = (TARGET_SIZE, TARGET_SIZE),
    class_mode = 'categorical',
    batch_size = BATCH_SIZE

)


val_flow = val_datagen.flow_from_dataframe(
    val_set,
    '/content/train_images',
    x_col = 'image',
    y_col = 'labels',
    target_size = (TARGET_SIZE, TARGET_SIZE),
    class_mode = 'categorical',
    batch_size = BATCH_SIZE

)

In [None]:
# Visualize an image
x_batch, y_batch = next(train_flow)
for i in range (0,6):
    image = x_batch[i]
    plt.imshow(image)
    plt.show()

In [None]:
NUM_CLASSES = len(np.unique(train_flow.classes))

# Modelling

In [None]:
def create_model():
  global NUM_CLASSES
  # Build model
  resnet = ResNet101V2(include_top=False, 
                          input_shape=(TARGET_SIZE, TARGET_SIZE, 3),
                          weights='imagenet')
  
  for layer in resnet.layers:
    layer.trainable = True

  model = Sequential([
                      resnet,
                      GlobalAveragePooling2D(),
                      # Flatten(),
                      # Dense(256, activation = 'relu', 
                      #     bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, 
                      #                                                 l2=0.001)),
                      # Dropout(0.5),
                      # Dense(32, activation = 'relu',
                      #     bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01,
                      #                                                 l2=0.001)),
                      # Dropout(0.5),
                      Dense(NUM_CLASSES, activation = 'softmax')
  ])

  # Instantiate learning rate and optimizer
  loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                                   label_smoothing=0.01,
                                                   name='categorical_crossentropy' )

  adam = tf.keras.optimizers.Adam(LR)

  auc = tf.keras.metrics.AUC(
    num_thresholds=200, curve='ROC',
    summation_method='interpolation', name=None, dtype=None,
    thresholds=None, multi_label=False, num_labels=None, label_weights=None,
    from_logits=False)

  # Compile model
  model.compile(loss = loss,
                optimizer = adam,
                metrics = ['categorical_accuracy', auc])
  
  return model
  
# with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU
#   model = create_model()
model = create_model()

In [None]:
def model_fitter(model):

  # instantiate callbacks
  
  early_stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                 patience=5)

  # reduce learning rate
  reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.1,
                                  patience = 2,
                                  min_lr = 1e-6,
                                  mode = 'min',
                                  verbose = 1)

  callbacks = [early_stopper, reduce_lr]

  # Train model
  history = model.fit(train_flow,
                    epochs=EPOCHS,
                    steps_per_epoch=int(np.ceil(len(train_set)/BATCH_SIZE)),
                    callbacks=callbacks,
                    validation_data=val_flow,
                    validation_steps=int(np.ceil(len(val_set)/BATCH_SIZE))
                    )
  
  return history, model

In [None]:
# Fit model
history, model = model_fitter(model)