<a href="https://colab.research.google.com/github/MattyK-dev/MattyK-dev.github.io/blob/master/SemiSupervised_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports:

In [1]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime
import math
import random

from tensorflow.keras.layers.experimental.preprocessing import Resizing, Rescaling
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Softmax, LeakyReLU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Model

%load_ext tensorboard
tf.random.set_seed(1)
random.seed(1)

Global:

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# drive.flush_and_unmount()

In [4]:
def initialise():
  # Constants:
  global model, unlabelled, batch_size, glob_epoch, loss_object, optimizer, labelled, test, validate, glob_epoch
  global train_loss, train_accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy
  global train_summary_writer, test_summary_writer, validate_summary_writer
  shuffle_buffer_size = 1024
  batch_size = 2

  ds = tf.keras.preprocessing.image_dataset_from_directory(
      'drive/MyDrive/dataset/pestsAndDiseases/',
      labels='inferred',
      label_mode = "int",
      color_mode='rgb',
      batch_size=batch_size,
      # image_size=(img_height, img_width),
      shuffle=True,
      seed=1,
  )

  # datagen = ImageDataGenerator(
  #     rescale=1./255,
  #     rotation_range=5,
  #     zoom_range=0.95, 0.95,
  #     horizontal_flip=True,
  #     vertical_flip=True,
  #     data_format='channels_last',
  #     validation_split=0.0,
  #     dtype=tf.float32
  # )

  # train_generator = datagen.flow_from_directory(
  #     'drive/MyDrive/dataset/pestsAndDiseases/',
  #     batch_size=batch_size,
  #     color_mode='rgb',
  #     class_mode='sparse',
  #     shuffle=True,
  #     subset=training
  # )

  # Split the dataset into labelled, unlabelled, validate and test
  ds = ds.shuffle(shuffle_buffer_size)
  dataset_size = len(ds)
  labelled = ds.take(int(0.1 * dataset_size))
  ds = ds.skip(int(0.1 * dataset_size))
  unlabelled = ds.take(int(0.6 * dataset_size))
  ds = ds.skip(int(0.6 * dataset_size))
  validate = ds.take(int(0.2 * dataset_size))
  ds = ds.skip(int(0.2 * dataset_size))
  test = ds.take(int(0.1 * dataset_size))
  ds = ds.skip(int(0.1 * dataset_size))

  # Shuffle dataset
  # Use `tf.data` to batch and shuffle the dataset:
  labelled = labelled.prefetch(tf.data.experimental.AUTOTUNE)
  unlabelled = unlabelled.prefetch(tf.data.experimental.AUTOTUNE)
  validate = validate.prefetch(tf.data.experimental.AUTOTUNE)
  test = test.prefetch(tf.data.experimental.AUTOTUNE)

  # Choose an optimizer and loss function for training: 
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) 
  optimizer = tf.keras.optimizers.Adam()

  # Select metrics to measure the loss and the accuracy of the model. These metrics accumulate the values over epochs and then print the overall result.
  train_loss = tf.keras.metrics.SparseCategoricalCrossentropy(name='train_loss')
  train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
  validate_loss = tf.keras.metrics.SparseCategoricalCrossentropy(name='validate_loss')
  validate_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='validate_accuracy')
  test_loss = tf.keras.metrics.SparseCategoricalCrossentropy(name='test_loss')
  test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

  # Tensorboard configurations
  current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

  train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
  validate_log_dir = 'logs/gradient_tape/' + current_time + '/validate'
  test_log_dir = 'logs/gradient_tape/' + current_time + '/test'

  train_summary_writer = tf.summary.create_file_writer(train_log_dir)
  validate_summary_writer = tf.summary.create_file_writer(validate_log_dir)
  test_summary_writer = tf.summary.create_file_writer(test_log_dir)

Train, validate and test the model:

In [5]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(labels, predictions)
  train_accuracy(labels, predictions)

@tf.function
def validate_step(images, labels):
  predictions = model(images, training=False)
  v_loss = loss_object(labels, predictions)

  validate_loss(labels, predictions)
  validate_accuracy(labels, predictions)

@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(labels, predictions)
  test_accuracy(labels, predictions)

Define the model:

In [6]:
class MyModel(Model):
  def __init__(self):
    super(MyModel, self).__init__()
    # self.resize = Resizing(64, 64)
    self.rescale = Rescaling(1.0/255)
    self.conv = Conv2D(8, 8)
    self.lr1 = LeakyReLU()
    self.conv2 = Conv2D(16, 4)
    self.lr2 = LeakyReLU()
    self.flatten = Flatten()
    self.d1 = Dense(128)
    self.lr3 = LeakyReLU()
    self.d2 = Dense(10)

  def call(self, x, training=False):
    # x = self.resize(x)
    x = self.rescale(x)
    x = self.conv(x)
    x = self.lr1(x)
    x = self.conv2(x)
    x = self.lr2(x)
    x = self.flatten(x)
    x = self.d1(x)
    x = self.lr3(x)
    x = self.d2(x)
    return x

In [8]:
# Step 1: train model on provided labelled data(initial data)
# Step 2: Train Semi-supervised learning layer(Use the new labels to iteratively train the model)
def modelTrain(EPOCHS):
  global model, unlabelled, batch_size, glob_epoch, loss_object, optimizer, labelled, test, validate, glob_epoch
  global train_loss, train_accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy
  global train_summary_writer, test_summary_writer, validate_summary_writer, batch_size
  progress = tf.keras.utils.Progbar(EPOCHS, width=30, verbose=1, interval=0.05, stateful_metrics=['train_loss', 'train_accuracy', 'test_loss', 'test_accuracy'], unit_name='step')
  
  for epoch in range(EPOCHS):
    # progress.update(epoch+1)
    glob_epoch += 1
    
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    validate_loss.reset_states()
    validate_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    # Step 1: train model on labelled data
    for images, labels in labelled:
      train_step(images, labels)
    with train_summary_writer.as_default():
      tf.summary.scalar('Train loss', train_loss.result(), step=glob_epoch)
      tf.summary.scalar('Train accuracy', train_accuracy.result(), step=glob_epoch)

    # Validate and test
    for images, labels in validate:
      validate_step(images, labels)
    with validate_summary_writer.as_default():
      tf.summary.scalar('Validation loss', validate_loss.result(), step=glob_epoch)
      tf.summary.scalar('Validation accuracy', validate_accuracy.result(), step=glob_epoch)

    for test_images, test_labels in test:
      test_step(test_images, test_labels)
    with test_summary_writer.as_default():
      tf.summary.scalar('Test loss', test_loss.result(), step=glob_epoch)
      tf.summary.scalar('Test accuracy', test_accuracy.result(), step=glob_epoch)

    print(
      f'Epoch {epoch + 1}, '
      f'Loss: {train_loss.result()}, '
      f'Accuracy: {train_accuracy.result() * 100}, '
      f'Validation Loss: {validate_loss.result()}, '
      f'Validation Accuracy: {validate_accuracy.result() * 100}, '
      f'Test Loss: {test_loss.result()}, '
      f'Test Accuracy: {test_accuracy.result() * 100}'
    )

In [9]:
def pseudo_labelling(images, labels):
  labels = tf.math.argmax(model(images), axis = 1, output_type=tf.dtypes.int32)
  return images, labels

def pseudo_subset(subset_size):
  global model, unlabelled, batch_size, glob_epoch, loss_object, optimizer, labelled, test, validate
  global train_loss, train_accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy
  global train_summary_writer, test_summary_writer, validate_summary_writer, batch_size
  subset = unlabelled.take(subset_size)
  unlabelled = unlabelled.skip(subset_size)
  subset = subset.map(pseudo_labelling)
  labelled = labelled.concatenate(subset)

Main:

In [None]:
def main():
  global model, unlabelled, batch_size, glob_epoch, loss_object, optimizer, labelled, test, validate
  global train_loss, train_accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy
  global train_summary_writer, test_summary_writer, validate_summary_writer, batch_size
  #constants
  glob_epoch = 0
  repeat_training = 1
  training_EPOCHS = 8
  subset_size = 5

  for i in range(repeat_training):
    initialise()
    # Create an instance of the model
    model = MyModel()
  
    # Size of subsets divided by batch size:
    iterations = math.floor(len(unlabelled) / subset_size)

    for j in range(iterations):
      # Train the model on the few available labelled data
      modelTrain(training_EPOCHS)
      pseudo_subset(subset_size)

    model.save('logs/models')
main()

Found 2378 files belonging to 10 classes.
Epoch 1, Loss: 8.563268661499023, Accuracy: 3.125, Validation Loss: 5.971063137054443, Validation Accuracy: 21.66666603088379, Test Loss: 5.056022644042969, Test Accuracy: 27.23214340209961
Epoch 2, Loss: 3.8546395301818848, Accuracy: 21.875, Validation Loss: 4.7524638175964355, Validation Accuracy: 21.875, Test Loss: 5.494017124176025, Test Accuracy: 17.85714340209961
Epoch 3, Loss: 4.717474460601807, Accuracy: 25.0, Validation Loss: 8.02916145324707, Validation Accuracy: 16.15720558166504, Test Loss: 8.359411239624023, Test Accuracy: 20.089284896850586
Epoch 4, Loss: 10.28063678741455, Accuracy: 15.625, Validation Loss: 10.190685272216797, Validation Accuracy: 10.416666030883789, Test Loss: 11.184470176696777, Test Accuracy: 8.482142448425293
Epoch 5, Loss: 9.183225631713867, Accuracy: 3.125, Validation Loss: 7.819547176361084, Validation Accuracy: 23.958332061767578, Test Loss: 7.877242565155029, Test Accuracy: 28.571430206298828
Epoch 6, Lo

In [None]:
# !rm -r logs
%tensorboard --logdir logs/gradient_tape

In [None]:
# !zip -r /runa.zip /content/logs