# Imports & Installations

In [None]:
%pip install tensorflow
%pip install scipy

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

import os
from pathlib import Path
import shutil
import uuid

# Data Handling

## Directory Preparation

In [None]:
train_dirs = ['Datasets/var_train_1', 'Datasets/var_train_2', 'Datasets/var_train_3']
combined_train_dir = 'Datasets/all_train'

os.makedirs(combined_train_dir, exist_ok=True)

# Combines directories in event of multiple training dataset directories
class_names = ['Clean_Tackles', 'Fouls']
for class_name in class_names:
    os.makedirs(os.path.join(combined_train_dir, class_name), exist_ok=True)

# Addresses duplicate filenames
for train_dir in train_dirs:
    for class_name in class_names:
        src_dir = os.path.join(train_dir, class_name)
        dst_dir = os.path.join(combined_train_dir, class_name)
        for img_name in os.listdir(src_dir):
            src_path = os.path.join(src_dir, img_name)
            unique_img_name = f"{uuid.uuid4()}_{img_name}"
            dst_path = os.path.join(dst_dir, unique_img_name)
            shutil.copy(src_path, dst_path)

## Image Preparation

In [None]:
train_dir = 'Datasets/all_train'
test_dir = 'Datasets/var_test_1'

In [None]:
def load_data(path, input_shape=(256, 256)):
    '''The function uses ImageDataGenerator with flow_from_directory to generate train and validation data 
       path: destination path for the folder where the data resides
       input_shape: determine the target shape of the images
       Returns traingen(train data) and valgen(validation data)
    '''
    datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2,
        brightness_range=[1.2, 2],
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=90
    )
    traingen = datagen.flow_from_directory(
        path,
        batch_size=32,
        class_mode='binary',
        subset='training',
        target_size=input_shape
    )
    valgen = datagen.flow_from_directory(
        path,
        batch_size=32,
        class_mode='binary',
        subset='validation',
        target_size=input_shape
    )
    return traingen, valgen

In [None]:
train_generator, validation_generator = load_data(train_dir)

In [None]:
# Test data
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Model

In [None]:
def create_model(input_shape=(256, 256, 3)):
    base_model = tf.keras.applications.ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    base_model.trainable = False
    
    inputs = tf.keras.Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(400, kernel_regularizer='l1', activation='relu')(x)
    x = tf.keras.layers.Dense(512, kernel_regularizer='l1', activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(400, kernel_regularizer='l1', activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    model = tf.keras.Model(inputs, outputs, name='ResNet50_Foul_Classifier')
    
    return model

In [None]:
model = create_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training

In [None]:
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=validation_generator
)

In [None]:
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc:.4f}")

In [None]:
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([0, max(history.history['loss'])])
plt.legend(loc='upper right')

plt.show()

# Model Foul Probability Ranking

In [None]:
predictions = model.predict(test_generator)
print(f"Predictions: {predictions[:10]}")

In [None]:
# Debugging
print("Training labels distribution:")
print(np.unique(train_generator.classes, return_counts=True))
print("Validation labels distribution:")
print(np.unique(validation_generator.classes, return_counts=True))
print("Test labels distribution:")
print(np.unique(test_generator.classes, return_counts=True))

In [None]:
# Linking names
filenames = test_generator.filenames
predictions_with_filenames = list(zip(filenames, predictions))

# Ranking
predictions_with_filenames.sort(key=lambda x: x[1])
sorted_predictions = np.array(predictions_with_filenames)

In [None]:
# Display the x images with the higehst and lowest foul chance
num_images_to_display = 10

plt.figure(figsize=(20, num_images_to_display * 2))

# Least
for i in range(num_images_to_display):
    img_path = os.path.join(test_dir, sorted_predictions[i, 0])
    img = mpimg.imread(img_path)
    plt.subplot(num_images_to_display, 2, 2*i+1)
    plt.imshow(img)
    plt.title(f"Probability of foul: {float(sorted_predictions[i, 1]):.2f}")
    plt.axis('off')

# Most
for i in range(num_images_to_display):
    img_path = os.path.join(test_dir, sorted_predictions[-(i+1), 0])
    img = mpimg.imread(img_path)
    plt.subplot(num_images_to_display, 2, 2*i+2)
    plt.imshow(img)
    plt.title(f"Probability of foul: {float(sorted_predictions[-(i+1), 1]):.2f}")
    plt.axis('off')

plt.show()

# Model Saving

In [None]:
model.save('model_1.h5')