### Imports

In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from os import listdir, environ
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import gc

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
print(tf.version)
print(tf.config.list_physical_devices('GPU'))

<module 'tensorflow._api.v2.version' from 'C:\\Users\\nicol\\anaconda3\\lib\\site-packages\\tensorflow\\_api\\v2\\version\\__init__.py'>
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Data 

In [2]:
# Constants
seed=42
val_size = 0.2
data_path_healthy = 'public_healthy'
data_path_unhealthy = 'public_unhealthy'

In [3]:
# Loads images from folder and converts to nparray
def load_images(path):
    files = os.listdir(path)
    images = []
    
    for file in files:
        file_path = os.path.join(path, file)
        image = cv2.imread(file_path)
        
        if image is not None:
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_rgb = image.astype(np.float32)
            images.append(image_rgb)
        else:
            print(f"Unable to load image: {jpg_file}")

    return np.array(images)

In [4]:
# Read the (cleaned) data
plants_healthy = load_images(data_path_healthy)
plants_unhealthy = load_images(data_path_unhealthy)

# Merge plant data sets
X = np.concatenate([plants_healthy, plants_unhealthy], axis=0)

# Create labels: 0 for 'healthy', 1 for 'unhealthy'
y = np.concatenate([np.zeros(len(plants_healthy)), np.ones(len(plants_unhealthy))], axis=0)

# Convert labels to categorical format using one-hot encoding
y = tfk.utils.to_categorical(y, num_classes=2)

# Split into train and validation (hidden test set on codalab)
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=seed, test_size=val_size, stratify=np.argmax(y,axis=1))

# Convert labels to categorical format using one-hot encoding
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(4000, 96, 96, 3) (1000, 96, 96, 3) (4000, 2) (1000, 2)


In [5]:
# Create an image data generator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,       
    width_shift_range=0.2,  
    height_shift_range=0.2,  
    shear_range=0.2,        
    zoom_range=0.2,          
    horizontal_flip=True,    
    fill_mode='nearest'      
)

datagen.fit(X_train, seed=seed)

## Hyperparameter tuning

In [6]:
# For reproducible results
def set_random_state(x):
    environ['PYTHONHASHSEED']=str(42)
    # random.seed(42) will ruin random choice method
    np.random.seed(42)
    tf.random.set_seed(42) 

In [7]:
# Create dataframe to store our results
stats_path = f'mark_V_stats.csv'
stats = pd.DataFrame(columns=['batch_size', 'rotation_range', 'width_shift_range', 'height_shift_range', 'shear_range', 
                            'zoom_range', 'horizontal_flip', 'vertical_flip', 'brightness_shift_range', 'val_loss',
                             'val_accuracy'])
stats.index.name = 'No.'

# Model constants 
input_shape = X.shape[1:] 
output_shape = y.shape[1]    
epochs = 200

# Hyperparameter variables
batch_sizes = [32, 48, 64]  
rotation_ranges = [20, 45, 90, 180]
width_shift_ranges = [0.1, 0.2, 0.3, 0.5]
height_shift_ranges = [0.1, 0.2, 0.3, 0.5]
shear_ranges = [0.1, 0.2, 0.3, 0.5]
zoom_ranges = [0.1, 0.2, 0.3, 0.5]
horizontal_flips = [False, True] 
vertical_flips = [False, True] 
brightness_shift_upper = [0.1, 0.2, 0.3, 0.5]

# Run until manually interrupted
i=0
while(True):
    # Clear tf and set random state
    gc.collect()
    tf.keras.backend.clear_session()
    set_random_state(42)
    print(f'RUNNING random hyperparameter settings {i}...')
    
    # Randomly select the batch size
    batch_size = random.choice(batch_sizes)
   
    # Randomly create a data generator
    datagen = ImageDataGenerator(
        rotation_range = random.choice(rotation_ranges),       
        width_shift_range = random.choice(width_shift_ranges),  
        height_shift_range = random.choice(height_shift_ranges),  
        shear_range = random.choice(shear_ranges),        
        zoom_range = random.choice(zoom_ranges),          
        horizontal_flip = random.choice(horizontal_flips), 
        vertical_flip = random.choice(vertical_flips),
        brightness_range = (0, random.choice(brightness_shift_upper)),
        fill_mode = 'nearest'      
    )
    datagen.fit(X_train, seed=seed)
    
    # Create Mark V
    convnext = tfk.applications.convnext.ConvNeXtLarge(
        include_top=False,
        include_preprocessing=True,
        weights='imagenet',
        input_shape=input_shape,
        pooling='avg',
    )
    convnext.trainable = False
    inputs = tfk.Input(X_train.shape[1:])
    x = convnext(inputs)
    outputs = tfkl.Dense(2, activation='softmax')(x)
    model = tfk.Model(inputs=inputs, outputs=outputs, name='model')
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])
    
    # (Try to) prevent exhausting resources
    gc.collect()
    
    # Transfer Learning
    early_stopping = tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size, seed=seed),
        epochs=epochs,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping]
    ).history
    
    # (Try to) prevent exhausting resources
    gc.collect()
    
    # Fine tuning
    model.get_layer('convnext_large').trainable = True
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate=1e-5), metrics='accuracy')
    early_stopping.patience = 10
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size, seed=seed),
        epochs=epochs,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping]
    ).history
    
    # Get validation loss and accuracy
    best_epoch = np.argmin(history['val_loss'])
    val_loss = history['val_loss'][best_epoch]
    val_acc = history['val_accuracy'][best_epoch]
    print('Validation accuracy: ', val_acc)
                
    # Add stats to stats dataframe
    iteration_stats = [batch_size, datagen.rotation_range, datagen.width_shift_range, datagen.height_shift_range, 
                       datagen.shear_range, datagen.zoom_range, datagen.horizontal_flip, datagen.vertical_flip, 
                       datagen.brightness_range, val_loss, val_acc]
    stats.loc[len(stats)] = iteration_stats
    
    # Sort and write to csv
    stats_sorted = stats.sort_values('val_loss')
    stats_sorted.to_csv(stats_path, sep='\t')
    
    # Save the fine tuned model
    model.save(f'saved_models/mark_V_{i}')
    
    print('\n')
    i+=1

RUNNING random hyperparameter settings 0...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200


Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Validation accuracy:  0.9290000200271606




INFO:tensorflow:Assets written to: saved_models/mark_V_0\assets


INFO:tensorflow:Assets written to: saved_models/mark_V_0\assets




RUNNING random hyperparameter settings 1...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200


Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Validation accuracy:  0.9240000247955322




INFO:tensorflow:Assets written to: saved_models/mark_V_1\assets


INFO:tensorflow:Assets written to: saved_models/mark_V_1\assets




RUNNING random hyperparameter settings 2...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
14/63 [=====>........................] - ETA: 6s - loss: 0.5190 - accuracy: 0.7454

KeyboardInterrupt: 