We use the two different subsets of ShapeStacks during training (cf. Table 1), each one containing an equal amount of stable and unstable images. Both types of violations (VCOM and VPSF, cf. Section 3.2) are evenly represented among unstable images.  

During training, we augment the training images by randomising colours, varying aspect-ratios, and applying random cropping, vertical flipping and minimal in-plane rotation. We ensure that all data augmentations still yield physically plausible, upright towers.

In [None]:
# %pip install opencv-python tensorflow pandas matplotlib

## Run in the terminal
# sudo apt-get install python3-opencv

In [59]:
import cv2
import matplotlib.pyplot as plt
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Model

In [93]:
# Function to get filenames and labels
def getFilenames(rootpath, file):
    # Lists to store file data
    filenames = []
    labels = []
    # List file parameters
    meta_list_file = os.path.join(rootpath, file)
    with open(meta_list_file) as f:
        meta_list = f.read().split('\n')
        meta_list.pop(0) # remove trailing empty line
        meta_list.pop(-1)

    for i, meta in enumerate(meta_list):
        if (i+1) % 1000 == 0 or i+1 == len(meta_list):
            print("%s / %s : %s" % (i+1, len(meta_list), meta))
        rec = meta.split(',')
        filenames.append(os.path.join(rootpath, 'train', rec[0]+'.jpg'))
        try:
            # label semantics: 0 = stable | 1 = unstable
            sem = [0. if int(rec[4]) == 0 else 1.]
            labels.append(sem)
        except:
            print(rec)
    return filenames, labels

In [94]:
trainPath = 'COMP90086_2024_Project_train'
trainFile = 'train.csv'

filenames, labels = getFilenames(trainPath, trainFile)
pd.DataFrame(labels).value_counts()

1000 / 7680 : 131545,2,2,5,0,1,5
2000 / 7680 : 266662,2,1,2,1,1,1
3000 / 7680 : 394858,1,2,5,0,1,5
4000 / 7680 : 523454,1,1,6,1,2,2
5000 / 7680 : 661586,2,2,4,1,2,2
6000 / 7680 : 789513,1,2,6,1,1,1
7000 / 7680 : 912435,2,2,6,2,1,2
7680 / 7680 : 999668,1,2,4,1,2,3


0  
1.0    5760
0.0    1920
Name: count, dtype: int64

In [130]:
cv2.imread(filenames[0])

array([[[111,  84,  57],
        [111,  84,  57],
        [111,  84,  57],
        ...,
        [ 68,  68,  68],
        [ 68,  68,  68],
        [ 68,  68,  68]],

       [[111,  84,  57],
        [111,  84,  57],
        [111,  84,  57],
        ...,
        [ 68,  68,  68],
        [ 68,  68,  68],
        [ 68,  68,  68]],

       [[111,  84,  57],
        [111,  84,  57],
        [111,  84,  57],
        ...,
        [ 68,  68,  68],
        [ 68,  68,  68],
        [ 68,  68,  68]],

       ...,

       [[ 93, 154, 216],
        [ 93, 155, 215],
        [ 96, 156, 216],
        ...,
        [ 56,  93, 127],
        [ 58,  93, 127],
        [ 64,  99, 132]],

       [[ 92, 152, 212],
        [ 93, 153, 212],
        [ 97, 155, 214],
        ...,
        [ 55,  92, 126],
        [ 56,  91, 125],
        [ 62,  97, 130]],

       [[ 90, 150, 209],
        [ 92, 152, 211],
        [ 97, 156, 212],
        ...,
        [ 56,  93, 127],
        [ 55,  90, 124],
        [ 59,  94, 127]]

In [116]:
# Define your image size and channels
imsize = 224
channels = 3

# Ensure filenames are strings
filenames = [str(f) for f in filenames]  # Convert all filenames to strings
tf_filenames = tf.constant(filenames)     # Create constant from filenames
tf_labels = tf.constant(labels)            # Ensure labels are in the correct format

# Create the dataset from filenames and labels
dataset = tf.data.Dataset.from_tensor_slices((tf_filenames, tf_labels))

# Function to load and preprocess images
def load_and_preprocess_image(filename, label):
    image = tf.io.read_file(filename)                      # Read the image file
    image = tf.image.decode_jpeg(image, channels=channels)  # Decode the image to a tensor
    image = tf.image.resize(image, [2 * imsize, imsize])  # Resize to the required size
    return image, label

# Apply the function to the dataset
dataset = dataset.map(load_and_preprocess_image)          # Load and preprocess images
dataset = dataset.shuffle(buffer_size=len(filenames))      # Shuffle the dataset

# Split into training and validation datasets
train_size = int(0.8 * len(filenames))                     # 80% for training
train_ds = dataset.take(train_size)
validation_ds = dataset.skip(train_size)

# Batch and prefetch both datasets
batch_size = 32
train_ds = train_ds.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
validation_ds = validation_ds.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [117]:
# Your implementation for part 2 goes here
cnn = keras.Sequential(
    [
        layers.Input((2*imsize,imsize,3)),            # Tell Keras the shape of the input array (a 3-channel twice imsize by imsize image)
        layers.Reshape((2,imsize,imsize,3)),          # Turn the input into two 64x64 images       
        layers.LayerNormalization(axis=[-1,-2,-3]),   # Normalize each image independently to mean=zero and sigma=one. YOUR MODEL WILL NOT WORK PROPERLY IF YOU DO NOT DO THIS
        layers.Permute((2,3,1,4)),                    # this pushes the dimension of size 2 (from the tiles) next to the channels
        layers.Reshape((imsize,imsize,6,1)),          # Reshape to (None, 64, 64, 6, 1)
        layers.Conv3D(filters=3,kernel_size=(4,4,3),strides=(1,1,3),activation='relu'),
        layers.MaxPooling3D(pool_size=2, strides=2),
        layers.Flatten(),                             # Unravel/flatten the input array
        layers.Dropout(0.5),                             # Regularization layer
        layers.Dense(16,activation='relu'),           # A single hidden layer with 16 outputs 
        layers.Dense(2,activation='softmax')          # A fully-connected layer with a softmax activation function to generate probabilities for the two classes (0bad and 1good)
    ]
)

In [118]:
cnn.summary()

In [119]:
# Configure the model
cnn.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
           loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), #use SparseCategoricalCrossentropy because labels are integers. If the labels are one-hot representation, please use CategoricalCrossentropy loss.
           metrics=['accuracy'])

In [120]:
history_baseline = cnn.fit(x=train_ds, epochs=20, validation_data = validation_ds, shuffle=True)

Epoch 1/20


2024-09-25 16:18:21.998984: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:20: Filling up shuffle buffer (this may take a while): 7384 of 7680
2024-09-25 16:18:22.090838: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m104/192[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1:42[0m 1s/step - accuracy: 0.6508 - loss: 1.0234

KeyboardInterrupt: 