In [None]:
import os
import numpy as np
import pandas as pd
import shutil
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout
import csv
#All imports needed

In [None]:
data = pd.read_csv('../input/uos-com2028/train.txt', delimiter = " ", header=None)
split_size = int(len(data.index)*0.2)

#Will create directories if non existent, one for training and the other for validating
if not os.path.exists("/kaggle/working/validation/"):
  os.mkdir("/kaggle/working/validation/")
if not os.path.exists("/kaggle/working/training"):
  os.mkdir("/kaggle/working/training")

#inserts images into folders of their respective label(80% in training and 20% in validation)
for path, label in data.values[:split_size]:
    validation_path = "/kaggle/working/validation/" + str(label)
    if not os.path.exists(validation_path):
        os.mkdir(validation_path)
    shutil.copyfile("../input/uos-com2028/train/" + path, validation_path + path.strip("train"))
for path, label in data.values[split_size:]:
    training_path = "/kaggle/working/training/" + str(label)
    if not os.path.exists(training_path):
      os.mkdir(training_path)
    shutil.copyfile("../input/uos-com2028/train/" + path, training_path + path.strip("train"))

In [None]:
#image width and height after transformation
image_h = 150
image_w = 150
batch_size = 32

#image data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    brightness_range=[0.8,1.2],
    horizontal_flip=True
)

#image data augmentation for training images
train_generator = datagen.flow_from_directory(
    "/kaggle/working/training",
    target_size=(image_h, image_w),
    batch_size=batch_size,
    classes= [str(i) for i in range(0,23)],
    class_mode="categorical",
    shuffle=True
)

#image data augmentation for validation images
validation_generator = datagen.flow_from_directory(
    "/kaggle/working/validation",
    target_size=(image_h, image_w),
    batch_size=batch_size,
    classes= [str(i) for i in range(0,23)],
    class_mode="categorical",
    shuffle=True
)

#image data augmentation for testing images
test_generator = datagen.flow_from_directory(
    '/kaggle/input/uos-com2028/test',
    target_size=(image_h, image_w),
    shuffle=False,
    batch_size=32,
    class_mode=None)

#variables the same length as training,validation and testing folders respectively
train_n = len(train_generator.filenames)
val_n = len(validation_generator.filenames)
test_n = len(test_generator.filenames)

In [None]:
base_model = keras.applications.Xception(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(150, 150, 3), #Same input shape as images from data generators
    include_top=False)  # Do not include the ImageNet classifier at the top.

#callback useful to maitain lowest value loss score
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
#callback waits for four epochs if the value_loss is not increasing and will get the highest value if score doesnt improve in 4 epochs
base_model.trainable = False #freeze the base model

inputs = keras.Input(shape=(150,150,3))
# make sure that the base_model is running in inference mode here,
x = base_model(inputs, training=False)
# Convert features of shape base_model.output_shape[1:] to vectors
x = keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)

outputs = keras.layers.Dense(23)(x)
#23 layers for 23 folders
model = keras.Model(inputs, outputs)

#fit and compile the base model
model.compile(optimizer=keras.optimizers.Adam(0.1),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=[keras.metrics.CategoricalAccuracy()])
#base model training does not require many epochs to be effective
model.fit(
    train_generator,
    steps_per_epoch=train_n // batch_size,
    epochs=10,
    verbose=1,
    validation_data=validation_generator,
    validation_steps=val_n // batch_size,
    callbacks=[callback]
)

#unfreeze the base model
base_model.trainable = True

#fit and compile the secondary model
model.compile(optimizer=keras.optimizers.Adam(0.0001),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=[keras.metrics.CategoricalAccuracy()])
model.fit(
    train_generator,
    steps_per_epoch=train_n // batch_size,
    epochs=30,
    verbose=1,
    validation_data=validation_generator,
    validation_steps=val_n // batch_size,
    callbacks=[callback]
)

In [None]:
#variable for test folder predictions
predictions = model.predict(test_generator, steps=test_n // 32)

In [None]:
#print the predictions of testing image(just for testing that it works)
for i in range(len(test_generator.filenames[:20])):
    print(test_generator.filenames[i], np.argmax(predictions[i]))

In [None]:
#write down predictions into a csv file
row_list = [["id", "label"]]

#for loop iterates through image IDs and returns predictions of each image.(writes down only the number after removing "test/" and ".jpg")
for x in range(len(predictions)):
    row_list.append([test_generator.filenames[x].strip('test/.jpg'), np.argmax(predictions[x])])

#writes down prediction labels and IDs into the csv file, ready for download to be submitted
with open("6597644.csv", "w", newline = "") as file:
    writer = csv.writer(file)
    writer.writerows(row_list)

#empty the variable
row_list = None