   #                         Plant Pathology

### Importing all Required Libraries

In [2]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
import keras
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.models import Sequential
import tensorflow.keras.layers as L
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
!pip install efficientnet
import efficientnet.tfkeras as efn

### Establishing the TPU Strategy

In [3]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()
    
def seed_everything(seed=0):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed = 2048
seed_everything(seed)
print("REPLICAS: ", strategy.num_replicas_in_sync)

Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8


### Loading the Kaggle dataset and establishing the filepath

In [4]:
# Data access
GCS_DS_PATH = KaggleDatasets().get_gcs_path()
#'/kaggle/input/plant-pathology-2020-fgvc7/'
# Configuration
EPOCHS = 40
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
def format_path(st):
    return  GCS_DS_PATH+ '/images/' + st + '.jpg'

### Setting up the train and test directories

In [5]:
train = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/train.csv')
test = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/test.csv')
sub = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')

train_paths = train.image_id.apply(format_path).values
test_paths = test.image_id.apply(format_path).values
train_labels = train.loc[:, 'healthy':].values
SPLIT_VALIDATION =True
if SPLIT_VALIDATION:
    train_paths, valid_paths, train_labels, valid_labels =train_test_split(train_paths, train_labels, test_size=0.15, random_state=seed)

In [None]:
type(train_paths)

### Decoding the Image with the specified size and performing Data Augumentation on the train data

In [None]:
image_size = 512
def decode_image(filename, label=None, image_size=(320,512)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if label is None:
        return image
    else:
        return image, label

### Using pipelines for efficient performance

In [None]:
train_dataset = (
tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .cache()
    .map(data_augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(tf.data.experimental.AUTOTUNE)
)

test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .batch(BATCH_SIZE)
)

### Learning Rate Scheduler

In [None]:
LR_START = 0.0001
LR_MAX = 0.0001 * strategy.num_replicas_in_sync
LR_MIN = 0.0001
LR_RAMPUP_EPOCHS = 4
LR_SUSTAIN_EPOCHS = 5
LR_EXP_DECAY = .8

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

rng = [i for i in range(EPOCHS)]
y = [lrfn(x) for x in rng]
plt.plot(rng, y)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

## Transfer Learning

### Defining the pre-trained models

In [None]:
from tensorflow.keras.applications import DenseNet121, DenseNet201
from tensorflow.keras.applications import vgg16
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import MobileNet , MobileNetV2
from tensorflow.keras.applications import InceptionResNetV2
import tensorflow.keras.layers as L
img_size1=320
img_size=512
def get_model_generalized(name):
    if name == 'EfficientNet7' :
        base_model = efn.EfficientNetB7(weights='noisy-student',
                                        include_top = False,
                                        input_shape=(img_size1,img_size,3)
                                       )
    if name == 'EfficientNet3' :
        base_model = efn.EfficientNetB3(weights='imagenet',
                                        include_top = False,
                                        input_shape=(img_size1,img_size,3)
                                       )        
            
    elif name == 'DenseNet' :
        base_model = DenseNet201(weights='imagenet',include_top=False,input_shape=(img_size1,img_size,3))
    elif name == 'MobileNet' :
        base_model = MobileNet(weights = 'imagenet', include_top=False,input_shape=(img_size1,img_size,3))
    elif name == 'Inception' :
        base_model = InceptionV3(weights = 'imagenet',include_top=False,input_shape=(img_size1,img_size,3))
    elif name == 'ResNet' :
        base_model = ResNet50(weights = 'imagenet',include_top=False,input_shape=(img_size1,img_size,3))
    elif name == 'Incepresnet' :
        base_model = InceptionResNetV2(weights = 'imagenet',include_top=False,input_shape=(img_size1,img_size,3))
    model=tf.keras.Sequential([base_model,
    L.GlobalMaxPooling2D(),
    L.Dense(512,activation='relu'),
    L.Dropout(0.2),
    L.Dense(256,activation='relu'),
    L.Dense(4, activation='softmax')])
    model.compile(
        optimizer = 'adam',
        loss = 'categorical_crossentropy',
        metrics=['categorical_accuracy']
    )
    return model

## Callbacks

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
mc = ModelCheckpoint('best_model_acc.h5', monitor = 'val_loss' , mode = 'min', verbose = 1 , save_best_only = True)
es = EarlyStopping(monitor = "val_loss" , verbose = 1 , mode = 'min' , patience = 10)

## Fitting the model

In [None]:
with strategy.scope() :
    model_dense = get_model_generalized("Incepresnet")
    model_dense.fit(train_dataset,
        steps_per_epoch = train_labels.shape[0] // BATCH_SIZE,
        callbacks = [lr_callback,mc],
        validation_data=valid_dataset,
        epochs =30)

### Validation acc: 95.62 
### Test accuracy : 96.6

In [None]:
with strategy.scope() :
    model = get_model_generalized("EfficientNet7")
    model.fit(train_dataset,
        steps_per_epoch = train_labels.shape[0] // BATCH_SIZE,
        callbacks = [lr_callback,mc],
        validation_data=valid_dataset,
        epochs =40)

### Validation acc: 97.08 
### Test accuracy : 98.1

In [None]:
with strategy.scope() :
    model = get_model_generalized("DenseNet")
    model.fit(train_dataset,
        steps_per_epoch = train_labels.shape[0] // BATCH_SIZE,
        callbacks = [lr_callback,mc],
        validation_data=valid_dataset,
        epochs =40)

### Validation acc: 95.26
### Test accuracy : 96.5

In [None]:
with strategy.scope() :
    model = get_model_generalized("Inception")
    model.fit(train_dataset,
        steps_per_epoch = train_labels.shape[0] // BATCH_SIZE,
        callbacks = [lr_callback,mc],
        validation_data=valid_dataset,
        epochs =40)

### Validation acc: 97.08
### Test accuracy : 97.9

In [None]:
with strategy.scope() :
    model = get_model_generalized("MobileNet")
    model.fit(train_dataset,
        steps_per_epoch = train_labels.shape[0] // BATCH_SIZE,
        callbacks = [lr_callback,mc],
        validation_data=valid_dataset,
        epochs =40)

### Validation acc: 94.89
### Test accuracy : 97.3

## Other models
Xception test accuracy: 95.08
Resnet accuracy: 96.2