In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.metrics import categorical_crossentropy
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D
from keras.layers import Activation, Dropout, BatchNormalization, Flatten, Dense, AvgPool2D,MaxPool2D
from keras.models import Sequential, Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras import backend as K
import cv2
import glob

In [2]:
from pathlib import Path
train_path = Path('../input/sports-classification/train')
train_path.glob(r'**/*.jpg')

valid_path = Path('../input/sports-classification/valid')
valid_path.glob(r'**/*.jpg')

test_path = Path('../input/sports-classification/test')
test_path.glob(r'**/*.jpg')

<generator object Path.glob at 0x7f98cf875450>

In [3]:
train_filepath = list(train_path.glob(r'**/*.jpg'))
valid_filepath = list(valid_path.glob(r'**/*.jpg'))
test_filepath = list(test_path.glob(r'**/*.jpg'))

In [4]:
os.path.split(os.path.split(train_filepath[0])[1]) 

('', '029.jpg')

In [5]:
classes_train = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],train_filepath))
classes_valid = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],valid_filepath))
classes_test = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],test_filepath))

In [6]:
train_df = pd.concat([pd.Series(train_filepath).astype(str), pd.Series(classes_train)], axis=1)
train_df.columns = ['Images', 'Image_label']

test_df = pd.concat([pd.Series(test_filepath).astype(str), pd.Series(classes_test)], axis=1)
test_df.columns = ['Images', 'Image_label']

valid_df = pd.concat([pd.Series(valid_filepath).astype(str), pd.Series(classes_valid)], axis=1)
valid_df.columns = ['Images', 'Image_label']

In [7]:
train_datagen = ImageDataGenerator(rescale = 1./255.,rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, 
                                   shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, vertical_flip =True)
test_datagen = ImageDataGenerator(rescale = 1.0/255.)

In [8]:
train_gen = train_datagen.flow_from_dataframe(dataframe = train_df,
                                              x_col = 'Images', y_col ='Image_label',
                                              target_size = (200,200), batch_size = 512, 
                                              class_mode = 'categorical', shuffle = True)
val_gen = train_datagen.flow_from_dataframe(valid_df, 
                                            target_size=(200,200), x_col = 'Images', y_col ='Image_label', 
                                            class_mode='categorical',
                                            batch_size= 512, shuffle=True)
test_gen = test_datagen.flow_from_dataframe(test_df,
                                            target_size = (200,200), x_col = 'Images', y_col ='Image_label',
                                             class_mode = 'categorical',
                                            batch_size = 512, shuffle = False)

Found 13572 validated image filenames belonging to 100 classes.
Found 500 validated image filenames belonging to 100 classes.
Found 500 validated image filenames belonging to 100 classes.


# EfficientNet

In [25]:
!pip install efficientnet

Collecting efficientnet
  Downloading efficientnet-1.1.1-py3-none-any.whl (18 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m551.7 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: keras-applications, efficientnet
Successfully installed efficientnet-1.1.1 keras-applications-1.0.8
[0m

In [26]:
import efficientnet.tfkeras as efn
base_model_efn=efn.EfficientNetB0(input_shape=(224,224,3),include_top=False,weights = 'imagenet')
for layer in base_model_efn.layers:
    layer.trainable = False  

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5


In [11]:
model_efn=Sequential()
model_efn.add(base_model_efn)
model_efn.add(GlobalAveragePooling2D())
model_efn.add(Dense(128))
model_efn.add(Dropout(0.2))
model_efn.add(Dense(100,activation='softmax'))
model_efn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b0 (Functional) (None, 7, 7, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               163968    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               12900     
Total params: 4,226,432
Trainable params: 176,868
Non-trainable params: 4,049,564
_________________________________________________________________


In [12]:
callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=2)
model_efn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
history_efn= model_efn.fit(train_gen,validation_data=val_gen,epochs=10,callbacks=[callbacks],
                    verbose=1)

2022-08-05 14:40:26.508127: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10


2022-08-05 14:40:40.041680: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
print("training_accuracy", history_efn.history['accuracy'][-1])
print("validation_accuracy", history_efn.history['val_accuracy'][-1])

training_accuracy 0.6629089117050171
validation_accuracy 0.7480000257492065


In [15]:
predictions_efn= model_efn.predict(test_gen)
preds_efn=np.argmax(predictions_efn,axis=1)
preds_efn

array([13, 13, 80, 13, 13, 45, 45, 45, 45, 45, 35, 79, 35, 79, 79, 33, 30,
       30, 30, 30, 55, 55, 55, 55, 55, 75, 75, 75, 56, 32,  1,  1,  1,  1,
        1, 93, 93, 93, 93, 93, 11, 11, 11, 11, 11, 35, 35, 35, 35, 35, 87,
       87, 87, 87, 87, 43, 43, 43, 43, 43, 62, 62, 61, 62, 62, 29, 29, 27,
       29, 27, 69, 71, 69, 69, 69,  7, 25, 25,  7, 25, 59, 59, 59, 59, 59,
       36, 36, 36, 36, 36, 53, 91, 91, 91, 26, 39, 39, 39, 39, 39, 71, 31,
       31, 71, 71, 27, 33, 29, 33, 33, 17, 17, 17, 17, 17, 73, 73, 73, 73,
       73, 47, 47, 47, 47, 47, 27, 27, 27, 27, 27, 70, 70, 70, 70, 70, 10,
       10, 10, 10, 10, 85, 85, 85, 85, 95, 52, 52, 52, 52, 52,  4,  4,  4,
        4,  4,  2,  2,  2,  2,  2,  7,  7,  7,  7,  7, 97, 97, 97, 97, 97,
       54, 54, 16, 54, 54, 53, 53, 31, 53, 53, 67, 67, 67, 67, 67, 42, 42,
       42, 42, 42, 46, 46, 46, 46, 72, 32, 32, 32, 32, 32, 23, 23, 23, 23,
       23, 51, 90, 51, 51, 43, 50, 50, 50, 50, 50, 94, 94, 94, 94, 94, 81,
       81, 81, 81, 81,  6

In [16]:
model_efn.evaluate(test_gen)



[0.5657534599304199, 0.8159999847412109]

In [17]:
labels_efn=train_gen.class_indices
labels_efn=dict((v,k) for k,v in labels_efn.items())
preds_efn=[labels_efn[k] for k in preds_efn]

In [18]:
from sklearn.metrics import confusion_matrix, classification_report
matrix_efn=confusion_matrix(test_df.Image_label,preds_efn)
matrix_efn

array([[2, 0, 0, ..., 0, 0, 0],
       [0, 5, 0, ..., 0, 0, 0],
       [0, 0, 5, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 5, 0, 0],
       [0, 0, 0, ..., 1, 3, 0],
       [0, 0, 0, ..., 0, 0, 4]])

In [19]:
print(classification_report(test_df.Image_label,preds_efn))

                       precision    recall  f1-score   support

           air hockey       1.00      0.40      0.57         5
      ampute football       0.83      1.00      0.91         5
              archery       0.71      1.00      0.83         5
        arm wrestling       1.00      1.00      1.00         5
         axe throwing       0.83      1.00      0.91         5
         balance beam       0.71      1.00      0.83         5
        barell racing       0.71      1.00      0.83         5
             baseball       0.71      1.00      0.83         5
           basketball       1.00      0.80      0.89         5
       baton twirling       1.00      0.60      0.75         5
            bike polo       0.62      1.00      0.77         5
            billiards       0.71      1.00      0.83         5
                  bmx       1.00      0.20      0.33         5
              bobsled       1.00      0.80      0.89         5
              bowling       1.00      0.60      0.75  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(test_df.Image_label,preds_efn)

0.816

# VGG16

In [9]:
from tensorflow.keras.applications.vgg16 import VGG16

base_model_vgg=VGG16(input_shape=(224,224,3),include_top=False,weights = 'imagenet')

2022-08-06 05:51:01.066843: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-06 05:51:01.175175: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-06 05:51:01.175942: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-06 05:51:01.177741: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
for layer in base_model_vgg.layers:
    layer.trainable = False

In [11]:
model_vgg=Sequential()
model_vgg.add(base_model_vgg)
model_vgg.add(GlobalAveragePooling2D())
model_vgg.add(Dense(128))
model_vgg.add(Dropout(0.2))
model_vgg.add(Dense(100,activation='softmax'))
model_vgg.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               65664     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               12900     
Total params: 14,793,252
Trainable params: 78,564
Non-trainable params: 14,714,688
_________________________________________________________________


In [13]:
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=2)
model_vgg.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [15]:
history_vgg=model_vgg.fit(train_gen,validation_data=val_gen,epochs=10,callbacks=[callbacks],
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
print("training_accuracy", history_vgg.history['accuracy'][-1])
print("validation_accuracy", history_vgg.history['val_accuracy'][-1])

training_accuracy 0.36133214831352234
validation_accuracy 0.4180000126361847


In [17]:
predictions_vgg= model_vgg.predict(test_gen)
preds_vgg=np.argmax(predictions_vgg, axis=1)
preds_vgg

array([13, 58, 80, 90, 32, 45, 26, 93, 45, 19, 35, 47, 81, 57, 28, 30, 78,
       67, 30, 30, 76, 55, 55, 55, 55, 12, 56, 58, 56, 95,  1, 28, 53, 91,
       53, 96, 93,  7, 93, 73, 11, 11, 11, 11, 71, 35, 35, 35, 35, 35, 22,
       27, 73, 87, 66, 19, 68, 43, 19, 54, 62, 62, 29, 62, 28, 29, 54, 27,
       29, 29, 53, 41, 53, 71, 53, 16, 25, 25, 44, 52, 66, 52, 71, 59, 59,
       36, 17, 73, 36, 30, 26, 91, 91, 91, 64, 39, 39, 98, 98, 39, 68, 71,
       41, 53, 71, 27, 81, 29, 95, 17, 17, 17, 17, 17, 17, 61, 73, 71,  3,
       50, 47, 47, 47, 47, 47, 27, 27, 27, 29, 28, 70, 92, 45, 70, 70, 98,
       39, 97, 35, 10, 85, 85, 45, 95, 95, 52, 58, 71, 52, 79,  7, 61,  4,
        4,  4,  2, 41,  2, 38,  2, 58, 25, 52, 20, 59, 97, 53, 59, 53, 97,
       16, 29, 42, 53, 54, 26, 53, 53, 53, 42, 47, 67, 47, 47, 18, 42, 51,
       40, 42, 97, 46, 13, 46, 46, 58, 58, 58, 56, 32, 32, 73, 23, 23, 23,
       23, 53, 64, 26, 51, 39, 40, 40, 66, 92, 66, 94, 98, 94, 55, 94, 81,
       91, 96, 81, 81,  6

In [18]:
model_vgg.evaluate(test_gen)



[2.107407569885254, 0.47600001096725464]

In [19]:
labels_vgg=train_gen.class_indices
labels_vgg= dict((v,k) for k,v in labels_vgg.items())
preds_vgg=[labels_vgg[k] for k in preds_vgg]

In [21]:
from sklearn.metrics import confusion_matrix, classification_report
matrix_vgg=confusion_matrix(test_df.Image_label,preds_vgg)
matrix_vgg

array([[3, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 3, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 2, 0, 0],
       [0, 0, 0, ..., 2, 2, 0],
       [0, 0, 0, ..., 0, 0, 3]])

In [22]:
print(classification_report(test_df.Image_label,preds_vgg))

                       precision    recall  f1-score   support

           air hockey       1.00      0.60      0.75         5
      ampute football       0.33      0.20      0.25         5
              archery       1.00      0.60      0.75         5
        arm wrestling       0.67      0.80      0.73         5
         axe throwing       1.00      0.60      0.75         5
         balance beam       0.50      0.20      0.29         5
        barell racing       0.67      0.40      0.50         5
             baseball       0.00      0.00      0.00         5
           basketball       0.00      0.00      0.00         5
       baton twirling       0.00      0.00      0.00         5
            bike polo       0.50      0.20      0.29         5
            billiards       0.80      0.80      0.80         5
                  bmx       0.50      0.20      0.29         5
              bobsled       0.33      0.20      0.25         5
              bowling       0.00      0.00      0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
from sklearn.metrics import accuracy_score
accuracy_score(test_df.Image_label,preds_vgg)

0.476