In [1]:
import pandas as pd
import numpy as np
import cv2
import math
import matplotlib.pyplot as plt

import PIL.Image as Image
import pathlib
import os

import tensorflow as tf
import tensorflow_hub as hub

from tensorflow import keras
from keras.preprocessing import image
from tensorflow.keras.utils import img_to_array
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential


In [2]:
train_dir = pathlib.Path('../input/sports-classification/train')
train_image_paths = list(train_dir.glob(r'**/*.jpg')) 

test_dir = pathlib.Path('../input/sports-classification/test')
test_image_paths = list(test_dir.glob(r'**/*.jpg'))

valid_dir = pathlib.Path('../input/sports-classification/valid')
valid_image_paths = list(valid_dir.glob(r'**/*.jpg'))

In [3]:
sports_labels = []

for filename in os.listdir(train_dir):
    labels = filename.split('/')[-1]
    sports_labels.append(labels)


In [4]:
# Count number of sports
len(sports_labels)

In [5]:
def image_processing(filepath):
    labels = [str(filepath[i]).split('/')[-2]
             for i in range(len(filepath))]
    
    filepath = pd.Series(filepath, name = 'Filepath').astype(str)
    labels = pd.Series(labels, name = 'Label')
    
    df = pd.concat([filepath, labels], axis='columns')
    
    return df
    

In [6]:
train_df = image_processing(train_image_paths)
test_df = image_processing(test_image_paths)
val_df = image_processing(valid_image_paths)

In [7]:
# Create DataFrame with just one label for each sport
df_unique = train_df.copy().drop_duplicates(subset=['Label']).reset_index()

In [8]:
# Display some sport pictures
fig, axes = plt.subplots(nrows=6, ncols=6, figsize=(8, 8),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df_unique.Filepath[i]))
    ax.set_title(df_unique.Label[i], fontsize = 12)
plt.tight_layout(pad=0.5)
plt.show()

In [9]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

In [10]:
train_images = train_generator.flow_from_dataframe(
    dataframe  = train_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = (224, 224),
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = 32,
    shuffle = True,
    seed = 0,
    rotation_range = 30,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [11]:
val_images = train_generator.flow_from_dataframe(
    dataframe  = val_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = (224, 224),
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = 32,
    shuffle = True,
    seed = 0,
    rotation_range = 30,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [12]:
test_images = test_generator.flow_from_dataframe(
    dataframe  = test_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = (224, 224),
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = 32,
    shuffle = False
)

In [13]:
pretrained_model = tf.keras.applications.MobileNetV2(
input_shape= (224, 224, 3),
include_top = False,
weights = 'imagenet',
pooling = 'avg'
)
pretrained_model.trainable = False

In [14]:
inputs = pretrained_model.input

x = tf.keras.layers.Dense(350, activation = 'relu')(pretrained_model.output)
x1 = tf.keras.layers.Dense(300, activation = 'relu')(x)
x2 = tf.keras.layers.Dense(250, activation = 'relu')(x1)
x3 = tf.keras.layers.Dense(200, activation = 'relu')(x2)
x4 = tf.keras.layers.Dense(150, activation = 'relu')(x3)
x5 = tf.keras.layers.Dense(100, activation = 'relu')(x4)

outputs = tf.keras.layers.Dense(100, activation = 'softmax')(x)

model = tf.keras.Model(inputs = inputs, outputs = outputs)

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [16]:
history = model.fit(
    train_images,
    validation_data = val_images,
    batch_size = 32,
    epochs = 20,
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor = 'val_loss',
            patience = 2,
            restore_best_weights = True
        )
        
    ]
)

In [17]:
# Predict the label of the test images
pred = model.predict(test_images)
pred = np.argmax(pred, axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k, v in labels.items())
pred1 = [labels[k] for k in pred]

In [18]:
def output(imagepath):
    img = image.load_img(imagepath, target_size=(224, 224, 3))
    img = img_to_array(img)
    img = img/255
    img = np.expand_dims(img, [0])
    answer = model.predict(img)
    y_class = answer.argmax(axis=-1)
    y = " ".join(str(x) for x in y_class)
    y = int(y)
    res = labels[y]
    return res

In [22]:
img = output('../input/sports-classification/test/polo/4.jpg')
img

In [23]:
model.save('SIC.h5')