# Swimming types classification

In [34]:
import tensorflow as tf
import os
import random
import numpy as np
import itertools
from sklearn import metrics
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from shutil import copyfile, rmtree

print('Tensorflow version : ' + tf.version.VERSION)

Tensorflow version : 2.7.0


In [35]:
CRAWL_SOURCE_DIR = os.path.join(os.getcwd(), 'data/train-val/crawl/')
BREASTSTROKE_SOURCE_DIR = os.path.join(os.getcwd(), 'data/train-val/breaststroke/')
BUTTERFLY_SOURCE_DIR = os.path.join(os.getcwd(), 'data/train-val/butterfly/')
BACKSTROKE_SOURCE_DIR = os.path.join(os.getcwd(), 'data/train-val/backstroke/')
TRAINING_DIR = os.path.join(os.getcwd(), 'tmp/swimming/training/')
VALIDATION_DIR = os.path.join(os.getcwd(), 'tmp/swimming/validation/')
IMAGE_TEST_FOLDER = os.path.join(os.getcwd(), 'data/test/')
SPLIT_SIZE = .9
USE_PRETRAINED_MODEL = False

### Data splitting

In [36]:
def split_data(source, name, split_size):

    training_dir = TRAINING_DIR + name+'/'
    validation_dir = VALIDATION_DIR + name+'/'

    if os.path.exists(training_dir) and os.path.isdir(training_dir):
        rmtree(training_dir)
    if os.path.exists(validation_dir) and os.path.isdir(validation_dir):
        rmtree(validation_dir)

    if not os.path.exists(training_dir):
        os.makedirs(training_dir)
    if not os.path.exists(validation_dir):
        os.makedirs(validation_dir)

    files = []
    for filename in os.listdir(source):
        file = source + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * split_size)
    validation_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[:training_length]
    validation_set = shuffled_set[:validation_length]

    for filename in training_set:
        this_file = source + filename
        destination = training_dir + filename
        copyfile(this_file, destination)

    for filename in validation_set:
        this_file = source + filename
        destination = validation_dir + filename
        copyfile(this_file, destination)

split_data(CRAWL_SOURCE_DIR, 'crawl', SPLIT_SIZE)
split_data(BREASTSTROKE_SOURCE_DIR, 'breaststroke', SPLIT_SIZE)
split_data(BUTTERFLY_SOURCE_DIR, 'butterfly', SPLIT_SIZE)
split_data(BACKSTROKE_SOURCE_DIR, 'backstroke', SPLIT_SIZE)

### Data preparation

In [37]:
train_dataGen = ImageDataGenerator(rescale=1./255.,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

validation_dataGen = ImageDataGenerator(rescale=1. / 255.)

train_Generator = train_dataGen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=64,
                                                    class_mode='categorical',
                                                    target_size=(244, 244))

validation_generator = validation_dataGen.flow_from_directory(VALIDATION_DIR,
                                                                batch_size=64,
                                                                class_mode='categorical',
                                                                target_size=(244, 244),
                                                                shuffle=False)

Found 589 images belonging to 4 classes.
Found 67 images belonging to 4 classes.


### Make model

In [38]:
class Model(tf.keras.Model):
    def __init__(self):
        super(Model, self).__init__()
        self.base_model = VGG16(input_shape=(244, 244, 3), include_top=False, weights='imagenet')
        self.flat = tf.keras.layers.Flatten()
        self.dense_1 = tf.keras.layers.Dense(units=1024, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(units=1024, activation='relu')
        self.dense_3 = tf.keras.layers.Dense(units=512, activation='relu')
        self.classifier = tf.keras.layers.Dense(4, activation='softmax', name="classification")

    def call(self, inputs):
        x = self.base_model(inputs)
        x = self.flat(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        x = self.dense_3(x)
        return self.classifier(x)

model = Model()

### Train model

In [39]:
if not USE_PRETRAINED_MODEL:
    callback_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                               patience=10,
                                                               verbose=1,
                                                               mode='auto',
                                                               baseline=None,
                                                               restore_best_weights=True)

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
                  loss='categorical_crossentropy',
                  metrics=['acc'])

    vgg_hist = model.fit(train_Generator,
                         validation_data=validation_generator,
                         epochs=250,
                         callbacks=[callback_early_stopping])

    model.save_weights('net.h5')
else:
    model.build((None,) + train_Generator.image_shape)
    model.load_weights('net.h5')

KeyboardInterrupt: 

### Plot metrics

In [None]:
def plot_metrics(hist, metrics, title):
    plt.plot(hist.history[metrics])
    plt.plot(hist.history['val_'+metrics])
    plt.title(title)
    plt.ylabel(metrics)
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

if not USE_PRETRAINED_MODEL:
    plot_metrics(vgg_hist, 'acc', 'Model accuracy')
    plot_metrics(vgg_hist, 'loss', 'Model Loss')

In [None]:
def plot_confusion_matrix(cm, classes,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Get the confusion matrix
Y_pred = model.predict(validation_generator, verbose = True)
y_pred = np.argmax(Y_pred, axis=1)
confusion = metrics.confusion_matrix(validation_generator.classes, y_pred)

# Plot confusion matrix
plt.figure()
plot_confusion_matrix(confusion, classes=train_Generator.class_indices, title='Confusion matrix')

### Testing model

In [None]:
image_test_names = [_ for _ in os.listdir(IMAGE_TEST_FOLDER)]
reverse_class_indices = {value : key for (key, value) in train_Generator.class_indices.items()}

plt.figure(figsize=(20, 7))
for index in range(len(image_test_names)):
    img = image.load_img(IMAGE_TEST_FOLDER + image_test_names[index], target_size=(244, 244))
    array_image = image.img_to_array(img)
    array_image = np.expand_dims(array_image, axis=0)
    array_image = array_image / 255.

    image_tensor = np.vstack([array_image])
    results = model.predict(image_tensor)
    predict = np.argmax(results, axis = 1)

    plt.subplot(2, 6, index + 1)
    plt.title(reverse_class_indices.get(predict[0]), fontsize=15)
    plt.imshow(img)