In [None]:
# open libraries
# TensorFlow and tf.keras
import tensorflow as tf
print(tf.__version__)
# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# For EDA
from collections import Counter
# For active learning
from sklearn.model_selection import train_test_split
from scipy.stats import entropy
from sklearn.metrics import f1_score, matthews_corrcoef
import copy # For data processing
import os
# get the current working directory
current_working_directory = os.getcwd()
# print output to the console
print(current_working_directory)
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "rdata"])
import rdata

In [None]:
# load KewMNIST data
kew_mnist = rdata.read_rda('Kew-MNIST-full-dataset.Rdata')

In [None]:
# pull out objects out of rdata file
kew_train_images = kew_mnist['kew_train_images']
kew_train_labels = kew_mnist['kew_train_labels']
kew_test_images = kew_mnist['kew_test_images']
kew_test_labels = kew_mnist['kew_test_labels']

In [None]:
class_names = ['Flower', 'Fruit', 'Leaf', 'Plant-Tag', 'Stem', 'Whole-Plant']

EDA

In [None]:
# Check shape of samples
kew_test_images.shape

In [None]:
kew_train_images.shape

In [None]:
# Merge train and test subsets
merged_images = np.concatenate((kew_train_images, kew_test_images), axis=0) # merge images
labels = list(kew_train_labels) + list(kew_test_labels) # merge labels

In [None]:
# Class distribution
label_counts = Counter(labels)

# Plot pie chart
plt.pie(label_counts.values(), labels=[f'{class_names[label]}: {count}' for label, count in label_counts.items()], 
        colors=plt.cm.Paired.colors)
# Title
plt.title('KewMNIST label distribution')
# Show plot
plt.show()


In [None]:
# Print the shape of the merged dataset
print(merged_images.shape)

In [None]:
# check if images and labels match -- use first 25 images
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(merged_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[labels[i]])
plt.show()
#subplot figures

# Print the first 25 labels
print(labels[:25])

In [None]:
# build the basic computer vision model - 23 mins
def Basic_CVModel():
    tf.random.set_seed(42)
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(500, 500)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(6, activation='softmax')
    ])

    # compile the model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])
    return(model)

In [None]:
def Small_CVModel():
    # cv small - 13 mins
    tf.random.set_seed(42)
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(500, 500)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(6, activation='softmax')
    ])

    # compile the model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])
    return(model)

In [None]:
def Big_CVModel():
    # Big neural network
    tf.random.set_seed(42) # 45 mins
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(500, 500)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(6, activation='softmax')
    ])

    # compile the model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])
    return (model)

In [None]:
def CNN_CVModel():
    # for cnns 2hrs
    # define cnn model
    tf.random.set_seed(42)
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(500, 500, 1)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(6, activation='softmax')
    ])

    # Compile the model with RMSprop optimizer
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return (model)

In [None]:
def CV_AL(model): 
    merged_images_np = np.array(merged_images)  # Ensure images are NumPy arrays
    labels_np = np.array(labels)  # Ensure labels are NumPy arrays

    model_metrics = {
        'model_accuracy' : [],
        'model_loss' : [],
        'train_accuracy' : [],
        'train_loss' : [],
        'mcc' : [],
        'f1' : []
    }

    # make the train test split 
    train_img, test_img, train_labels, test_labels = train_test_split(
        merged_images_np, labels_np, train_size=120, random_state=42, stratify=labels
    )

    # Calucates split fraction for labeled datset
    def calc_fractions(train_labels, sum_labels):
        return (len(train_labels)/len(sum_labels))

    # calculate fraction of data used for training
    training_data_fraction = calc_fractions(train_labels, labels) 

    print(f"train_img shape: {train_img.shape}, train_labels shape: {train_labels.shape}")

    iteration = 0

    # Start iteration 
    while training_data_fraction < 0.9: # continue to cycle until training is no more than 90% of total sample size 

        print (f"\n Iteration {iteration} - Training with {len(train_img)} samples")

        # train classifier for this iteration
        model_history = model.fit(train_img, train_labels, epochs=10, verbose=2) # fit the model
        predictions = model.predict(test_img) # get predictions
        predicted_labels = np.argmax(predictions, axis=1)  # Convert probabilities to class labels

        # Get loss & accuracy for training & test set
        train_acc = model_history.history['accuracy'][-1]   # Last epoch accuracy
        train_loss = model_history.history['loss'][-1]      # Last epoch loss
        test_loss, test_acc = model.evaluate(test_img,  test_labels, verbose=2)
        print('\nTest accuracy:', test_acc) 

        # get mcc and f1 scores
        model_metrics['mcc'].append(matthews_corrcoef(test_labels, predicted_labels))
        model_metrics['f1'].append(f1_score(test_labels, predicted_labels, average='weighted') )
        print('\nMCC:', matthews_corrcoef(test_labels, predicted_labels)) 
        
        # store metrics
        model_metrics['model_accuracy'].append(test_acc)
        model_metrics['model_loss'].append(test_loss)
        model_metrics['train_accuracy'].append(train_acc)
        model_metrics['train_loss'].append(train_loss)
        
        # Compute entropy-based uncertainty
        uncertainty = entropy(predictions.T)  # Compute entropy for each sample
        q_indexes = np.argsort(uncertainty)[-100:]  # Select 100 most uncertain samples

        # Add most uncertain cample to training set
        train_img = np.vstack((train_img, test_img[q_indexes])) # Place most uncertain samples underneath labeled data
        train_labels = np.concatenate((train_labels, test_labels[q_indexes])) # Place labels of most uncertain samples in labelled data
        
        # Update fraction
        labeled_data_fraction = calc_fractions(train_labels, labels) 
        if labeled_data_fraction > 0.9: # break loop if fraction > 90%
            break

        # remove the queried samples from the test set
        test_img = np.delete(test_img, q_indexes, axis = 0)
        test_labels = np.delete(test_labels, q_indexes, axis = 0)

        iteration += 1

    # Print final model performance
    print("\nFinal Model Performance:")
    print(f"Train Accuracy: {model_metrics['train_accuracy'][-1]:.4f}")
    print(f"Test Accuracy: {model_metrics['model_accuracy'][-1]:.4f}") 
    return (model_metrics)

In [None]:
Base_Architecture = Basic_CVModel()
baseNetwork_metrics = CV_AL(Base_Architecture) ## run once - takes 24 mins

In [None]:
Small_Architecture = Small_CVModel()
smallNetwork_metrics = CV_AL(Small_Architecture) ## run once - takes 15 mins

In [None]:
Big_Architecture = Big_CVModel()
BigNetwork_metrics = CV_AL(Big_Architecture) ## run once - takes 45 mins

In [None]:
CNN_Architecture = CNN_CVModel()
CNN_metrics = CV_AL(CNN_Architecture) ## run once - takes 2 hours

In [None]:
# turn results from all cv models into pandas
baseNetwork_metrics_df = pd.DataFrame(baseNetwork_metrics)
smallNetwork_metrics_df = pd.DataFrame(smallNetwork_metrics)
denseNetwork_metrics_df = pd.DataFrame(BigNetwork_metrics)
cnn_metrics_df = pd.DataFrame(CNN_metrics)

In [None]:
def accuracy_loss_plots (dataFrame, name):
    # Accuracy Plot
    plt.subplot(1,2,1)
    plt.plot(dataFrame['train_accuracy'], label="Train Accuracy")
    plt.plot(dataFrame['model_accuracy'], label="Test Accuracy")
    plt.xlabel("Iteration", fontsize=10)
    plt.ylabel("Accuracy", fontsize=10)
    plt.title("Model Accuracy", fontsize=10)

    # Loss Plot
    plt.subplot(1,2,2)
    plt.plot(dataFrame['train_loss'], label="Train Loss")
    plt.plot(dataFrame['model_loss'], label="Test Loss")
    plt.xlabel("Iteration", fontsize=10)
    plt.ylabel("Loss", fontsize=10, labelpad=2)
    plt.title("Model Loss", fontsize=10)

    plt.suptitle(name + ' performance')
    fig = plt.gcf()
    fig.legend(['Train', 'Test'], loc="upper left",  bbox_to_anchor=(0.03, 1), fontsize=10)
    plt.subplots_adjust(wspace=0.25)
    plt.show()

accuracy_loss_plots(baseNetwork_metrics_df, 'Basic Neural Network')
accuracy_loss_plots(smallNetwork_metrics_df, 'Small Neural Network')
accuracy_loss_plots(denseNetwork_metrics_df, 'Big Neural Network')
accuracy_loss_plots(cnn_metrics_df, 'Convoluted Neural Network')


# mcc subplots
plt.plot(baseNetwork_metrics_df['mcc'], label="Basic Neural Network")
plt.plot(smallNetwork_metrics_df['mcc'], label="Small Nueral Network")
plt.plot(denseNetwork_metrics_df['mcc'], label="Big Neural Network")
plt.plot(cnn_metrics_df['mcc'], label="Convoluted Neural Network")
plt.xlabel("Iteration")
plt.ylabel("MCC score")
plt.title("MCC scores over Iterations")
plt.legend()
plt.show()


plt.plot(baseNetwork_metrics_df['f1'], label="Basic Neural Network")
plt.plot(smallNetwork_metrics_df['f1'], label="Small Nueral Network")
plt.plot(denseNetwork_metrics_df['f1'], label="Big Neural Network")
plt.plot(cnn_metrics_df['f1'], label="Convoluted Neural Network")
plt.xlabel("Iteration")
plt.ylabel("F1 score")
plt.title("F1 scores Over Iterations")
plt.legend()
plt.show()



In [None]:
# save results to file
# index=False to prevent extra index column
cnn_metrics_df.to_csv('cnn_metrics.csv', index=False)
denseNetwork_metrics_df.to_csv('denseNetwork_metrics.csv', index=False)
smallNetwork_metrics_df.to_csv('smallNetwork_metrics.csv', index=False)
baseNetwork_metrics_df.to_csv('baseNetwork_metrics.csv', index=False)


print("CSV file saved successfully!")

In [None]:
print(os.getcwd()) 