In [28]:
#IMPORTS
import tensorflow as tf
import numpy as np
import pandas as pd
import pathlib
import datetime
import scipy
import matplotlib.pyplot as plt
import math
import os
import tensorboard as tb
import keras_tuner as kt
import skimage.io as io
import skimage.transform as trans
from sklearn.metrics import classification_report
print(os.getcwd())

#Google Drive Setup
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
FOLDERNAME = "cs231n/assignments/assignment5"
assert FOLDERNAME is not None, "[1] Enter the foldername."
import sys
sys.path.append("/content/drive/My Drive/{}".format(FOLDERNAME)) #you have no idea how long I spent figuring out this had to be brackets and not parentheses
#print(sys.path)

/content
Mounted at /content/drive


In [None]:
#LOAD DATA
# Shape of inputs to DNN Model
BATCH_SIZE = 32             # Can be of size 2^n, but not restricted to. for the better utilization of memory
IMG_HEIGHT = 224            # input Shape required by the model
IMG_WIDTH = 224             # input Shape required by the model
STEPS_PER_EPOCH = 100 #np.ceil(image_count/BATCH_SIZE)

#DATA PREPROCESSING
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input)
train_data_gen = image_generator.flow_from_directory(directory=str("/content/drive/My Drive/cs231n/assignments/assignment6/dataset/train"),
                                                        batch_size=BATCH_SIZE,
                                                        shuffle=True,
                                                        target_size=(IMG_HEIGHT, IMG_WIDTH))

val_data_gen = image_generator.flow_from_directory(directory=str("/content/drive/My Drive/cs231n/assignments/assignment6/dataset/val"),
                                                        batch_size=BATCH_SIZE,
                                                        shuffle=True,
                                                        target_size=(IMG_HEIGHT, IMG_WIDTH))


Found 8323 images belonging to 10 classes.
Found 2084 images belonging to 10 classes.


In [None]:
#HYPERPARAMETER TUNING (ONLY LR - REGULARIZATION AND NUM_HIDDEN NOT AVAILABLE FOR MODEL, EPOCHS AND LR FURTHER OPTIMIZED USING CALLBACKS)
def model_builder(hp):
  model = tf.keras.applications.ResNet50V2(
    include_top=True,
    weights=None, #"imagenet"
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=10,
    classifier_activation="softmax",
  )

  # LR Tuning
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=hp_learning_rate), loss="categorical_crossentropy", metrics=['accuracy', 'mean_squared_error'])

  return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(train_data_gen, epochs=50, validation_data=val_data_gen, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best LR from Hyperparameter Tuning = {}".format(best_hps.get('learning_rate')))

Trial 3 Complete [00h 03m 50s]
val_accuracy: 0.27639156579971313

Best val_accuracy So Far: 0.41938579082489014
Total elapsed time: 01h 02m 17s
INFO:tensorflow:Oracle triggered exit
Best LR from Hyperparameter Tuning = 0.01


In [None]:
#LOAD MODEL
#load model
model = tf.keras.applications.ResNet50V2(
    include_top=True,
    weights=None, #"imagenet"
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=10,
    classifier_activation="softmax",
)
#print(model.summary())

#callbacks
#%load_ext tensorboard_callback 
#%tensorboard --logdir logs
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=best_hps.get('learning_rate')), loss="categorical_crossentropy", metrics=['accuracy', 'mean_squared_error'])
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
earlystop_cb = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
learningrates_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss')
checkpoint_filepath = '/content/drive/My Drive/cs231n/assignments/assignment6/checkpoints'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
#TRAIN MODEL
#train
history = model.fit(
                train_data_gen,
                steps_per_epoch=STEPS_PER_EPOCH,
                epochs=300, #
                validation_data=val_data_gen,
                validation_batch_size=1,
                validation_steps=32,
                callbacks=[earlystop_cb, learningrates_cb, model_checkpoint_callback]) #tensorboard_callback, 

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [30]:
#OUTPUT TEST PREDICTIONS
#load testing data
model.load_weights(checkpoint_filepath)
#print(classification_report(test['Score'], y_pred))

#from glob import glob
#te_image_count = len(list(glob('./dataset/test/*.jpg')))
#print(te_image_count)

#from https://github.com/zhixuhao/unet/blob/master/data.py
def testGenerator(test_path,num_image = 30,target_size = (224,224),flag_multi_class = False,):
    for i in range(num_image):
        img = io.imread(os.path.join(test_path,f"{i+200001}.jpg"))
        # img = img / 255
        img = tf.keras.applications.resnet_v2.preprocess_input(
          img, data_format=None
        )
        img = trans.resize(img,target_size)
        # img = np.reshape(img,img.shape+(1,)) if (not flag_multi_class) else img
        img = np.reshape(img,(1,)+img.shape)
        yield img

#predict
testGene = testGenerator("/content/drive/My Drive/cs231n/assignments/assignment6/test_images", num_image = 3469) #couldn't get glob to work with google drive pathing
results = model.predict(testGene)

img_ids = []
labels = []
indices = np.argmax(results, axis=1)
idx = 0
for dec in indices:
  img_ids.append('{}.jpg'.format(idx+200001))
  idx = idx+1
  if dec == 0:
    labels.append('bacterial_leaf_blight')
  elif dec == 1:
    labels.append('bacterial_leaf_streak')
  elif dec == 2:
    labels.append('bacterial_panicle_blight')
  elif dec == 3:
    labels.append('blast')
  elif dec == 4:
    labels.append('brown_spot')
  elif dec == 5:
    labels.append('dead_heart')
  elif dec == 6:
    labels.append('downy_mildew')
  elif dec == 7:
    labels.append('hispa')
  elif dec == 8:
    labels.append('normal')
  elif dec == 9:
    labels.append('tungro')

d = {"image_id": img_ids,
     "label": labels}
df = pd.DataFrame(data=d)
df.to_csv("/content/drive/My Drive/cs231n/assignments/assignment6/Paddy_Doctor_Predictions.csv")