## Data Augmentation 

In [None]:
from keras.preprocessing.image import ImageDataGenerator
import os
from os.path import isfile, join
import pandas as pd
import numpy as np
from skimage.io import imread
from tqdm import tqdm

In [None]:
# First create a data generator
datagen = ImageDataGenerator(rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.15,
                            zoom_range=0.1,
                            channel_shift_range=10,
                            horizontal_flip=True)

In [None]:
targets = ['baseball', 'cricket', 'football']

In [None]:
def augment_data(datagen, target: str)->None:
    '''
        This function receives a data generator and target and returns
        a number of copies of the same image with different transformations,
        to increment the number of images for train our model
        
        Parameters:
            datagen: Object generator with the features to transform the image
            target: str
    
        Returns:
            A message of work is done
    '''
    
    # first create a list of the files on the target folder
    target_lst = [f'img/train/{target}/' + f for f in os.listdir(f'img/train/{target}/') if isfile(join(f'img/train/{target}/', f))]
    # path for save the augmentated data
    save_here = f'img/train/{target}/aug'
    try:
        os.stat(save_here) # if folder exits save on it
    except:
        os.mkdir(save_here) # if not, create it and save on it
    for i in tqdm(range(64)):
        # transformation the image
        image = np.expand_dims(imread(target_lst[i]), axis=0) 
        datagen.fit(image)
        # makes the augmetation
        for x, val in zip(datagen.flow(image,                     # image we choose
                                  save_to_dir=save_here,          # the folder on we save the new image 
                                  save_prefix='aug',               
                                  save_format='png'), range(100)): # number of augmented images we want
            pass
    return 'Augmentation Finished'

In [None]:
for target in targets:
    augment_data(datagen, target)

## Prepare data

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread
import cv2

In [None]:
X = []
Y = []

for file in tqdm(os.listdir('img/train/baseball/aug/')):
    image = imread('img/train/baseball/aug/'+file)
    smallimage = cv2.resize(image,(64,64))
    X.append(smallimage)
    Y.append(0)

for file in tqdm(os.listdir('img/train/cricket/aug/')):
    image = imread('img/train/cricket/aug/'+file)
    smallimage = cv2.resize(image,(64,64))
    X.append(smallimage)
    Y.append(1)

for file in tqdm(os.listdir('img/train/football/aug/')):
    image = imread('img/train/football/aug/'+file)
    smallimage = cv2.resize(image,(64,64))
    X.append(smallimage)
    Y.append(2)

In [None]:
X_128 = []
Y_128 = []

for file in tqdm(os.listdir('img/train/baseball/aug/')):
    image = imread('img/train/baseball/aug/'+file)
    smallimage = cv2.resize(image,(128,128))
    X_128.append(smallimage)
    Y_128.append(0)

for file in tqdm(os.listdir('img/train/cricket/aug/')):
    image = imread('img/train/cricket/aug/'+file)
    smallimage = cv2.resize(image,(128,128))
    X_128.append(smallimage)
    Y_128.append(1)

for file in tqdm(os.listdir('img/train/football/aug/')):
    image = imread('img/train/football/aug/'+file)
    smallimage = cv2.resize(image,(128,128))
    X_128.append(smallimage)
    Y_128.append(2)

In [None]:
X_32 = []
Y_32 = []

for file in tqdm(os.listdir('img/train/baseball/aug/')):
    image = imread('img/train/baseball/aug/'+file)
    smallimage = cv2.resize(image,(32,32))
    X_32.append(smallimage)
    Y_32.append(0)

for file in tqdm(os.listdir('img/train/cricket/aug/')):
    image = imread('img/train/cricket/aug/'+file)
    smallimage = cv2.resize(image,(32,32))
    X_32.append(smallimage)
    Y_32.append(1)

for file in tqdm(os.listdir('img/train/football/aug/')):
    image = imread('img/train/football/aug/'+file)
    smallimage = cv2.resize(image,(32,32))
    X_32.append(smallimage)
    Y_32.append(2)

In [None]:
X,Y = np.array(X), np.array(Y)
X_128,Y_128 = np.array(X_128), np.array(Y_128)
X_32,Y_32 = np.array(X_32), np.array(Y_32)

In [None]:
X = X/255.0 # normalize X
X_128 = X_128/255.0
X_32 = X_32/255.0

In [None]:
from sklearn.utils import shuffle

X,Y = shuffle(X,Y, random_state=42)

X_128,Y_128 = shuffle(X_128,Y_128, random_state=42)

X_32, Y_32 = shuffle(X_32,Y_32, random_state=42)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = .2, random_state=42, stratify=Y)

print(X_train.shape); print(X_test.shape)

In [None]:
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
print(np.unique(y_train,return_counts=True),'values of each class')

In [None]:
X_train_32, X_test_32, y_train_32, y_test_32 = train_test_split(X_32,Y_32, test_size = .2, random_state=42, stratify=Y_32)

print(X_train_32.shape); print(X_test_32.shape)

### Convolutional Network

In [None]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers.core import Activation
import numpy as np
from keras.layers.core import Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Flatten
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})

In [None]:
Y_train = np_utils.to_categorical(y_train,3)
Y_test = np_utils.to_categorical(y_test,3)

Y_train_32 = np_utils.to_categorical(y_train_32,3)
Y_test_32 = np_utils.to_categorical(y_test_32,3)

### Create metric functions for model evaluation

In [None]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
X_train.shape, Y_train.shape

## Create a convolutional Network

In [None]:
img_rows = 32
img_cols = 32
kernel_size = 4 # Size of the kernel for the convolution layers
pool_size = 2 # Size of the pooling region for the pooling layers

model = Sequential()

model.add(Convolution2D(32, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(Convolution2D(64, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy", f1_m, recall_m, precision_m])
history_3 = model.fit(
    X_train_32, # Training data
    Y_train_32, # Labels of training data
    batch_size=128, # Batch size for the optimizer algorithm
    epochs=10, # Number of epochs to run the optimizer algorithm
    verbose=1 # Level of verbosity of the log messages
)
score = model.evaluate(X_test_32, Y_test_32)
print("Test loss", score[0])
print("Test accuracy", score[1])

pd.DataFrame(history_3.history).plot(figsize=(8,5))
plt.grid(True)
plt.show()

In [None]:
test_32 = []

for file in os.listdir('img/test/'):
    image = imread('img/test/'+file)
    smallimage = cv2.resize(image,(32,32))
    test_32.append(smallimage)

test_32 = np.array(test_32)/255.0

In [None]:
preds_32 = []

for i in range(len(test_32)):
    pred_32 = model.predict(np.expand_dims(test_32[i], axis=0))
    
    if pred_32[0][0]>pred_32[0][1] and pred_32[0][0]>pred_32[0][2]:
        preds_32.append(0)
    elif pred_32[0][1]>pred_32[0][0] and pred_32[0][1]>pred_32[0][2]:
        preds_32.append(1)
    else:
        preds_32.append(2)

for i in range(len(test_32)):
    plt.figure(figsize=(30,20))
    plt.subplot(9,5,i+1)
    plt.xticks([])
    plt.yticks([])
    if preds_32[i] == 0:
        plt.xlabel('Beisbol')
        plt.imshow(test_32[i])
    elif preds_32[i] == 1:
        plt.xlabel('Cricket')
        plt.imshow(test_32[i])
    else:
        plt.xlabel('Football')
        plt.imshow(test_32[i])
plt.show()

In [None]:
img_rows = 64
img_cols = 64
kernel_size = 4 # Size of the kernel for the convolution layers
pool_size = 2 # Size of the pooling region for the pooling layers

model = Sequential()

model.add(Convolution2D(32, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(Convolution2D(32, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy", f1_m, recall_m, precision_m])
history_3 = model.fit(
    X_train, # Training data
    Y_train, # Labels of training data
    batch_size=128, # Batch size for the optimizer algorithm
    epochs=11, # Number of epochs to run the optimizer algorithm
    verbose=1 # Level of verbosity of the log messages
)
score = model.evaluate(X_test, Y_test)
print("Test loss", score[0])
print("Test accuracy", score[1])

pd.DataFrame(history_3.history).plot(figsize=(8,5))
plt.grid(True)
plt.show()

## Prepare data test to predict

In [None]:
test = []

for file in os.listdir('img/test/'):
    image = imread('img/test/'+file)
    smallimage = cv2.resize(image,(64,64))
    test.append(smallimage)

test = np.array(test)/255.0

In [None]:
preds = []
for i in range(len(test)):
    pred = model.predict(np.expand_dims(test[i], axis=0))
    
    if pred[0][0]>pred[0][1] and pred[0][0]>pred[0][2]:
        preds.append(0)
    elif pred[0][1]>pred[0][0] and pred[0][1]>pred[0][2]:
        preds.append(1)
    else:
        preds.append(2)

In [None]:
for i in range(len(test)):
    plt.figure(figsize=(30,20))
    plt.subplot(9,5,i+1)
    plt.xticks([])
    plt.yticks([])
    if preds[i] == 0:
        plt.xlabel('Beisbol')
        plt.imshow(test[i])
    elif preds[i] == 1:
        plt.xlabel('Cricket')
        plt.imshow(test[i])
    else:
        plt.xlabel('Football')
        plt.imshow(test[i])
plt.show()

## Convolutional network with 128 x 128 pixels images

In [None]:
X_train_128, X_test_128, y_train_128, y_test_128 = train_test_split(X_128,Y_128, test_size = .2, random_state=42, stratify=Y_128)

print(X_train_128.shape); print(X_test_128.shape)

In [None]:
Y_train_128 = np_utils.to_categorical(y_train,3)
Y_test_128 = np_utils.to_categorical(y_test,3)

In [None]:
img_rows = 128
img_cols = 128
kernel_size = 4 # Size of the kernel for the convolution layers
pool_size = 2 # Size of the pooling region for the pooling layers

model = Sequential()

model.add(Convolution2D(128, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(Convolution2D(64, # Number convolution channels to generate
                        (kernel_size, kernel_size), # Size of convolution kernels
                        padding='valid', # Strategy to deal with borders
                        input_shape=(img_rows, img_cols, 3))) # Size = image rows x image columns x channels
model.add(Activation('relu'))
model.add(Convolution2D(64, 
                        (kernel_size, kernel_size), 
                        padding='valid', 
                        input_shape=(img_rows, img_cols, 3))) 
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy", f1_m, recall_m, precision_m])
history_3 = model.fit(
    X_train_128, # Training data
    Y_train_128, # Labels of training data
    batch_size=16, # Batch size for the optimizer algorithm
    epochs=4, # Number of epochs to run the optimizer algorithm
    verbose=1 # Level of verbosity of the log messages
)
score = model.evaluate(X_test_128, Y_test_128)
print("Test loss", score[0])
print("Test accuracy", score[1])

pd.DataFrame(history_3.history).plot(figsize=(8,5))
plt.grid(True)
plt.show()

In [None]:
test_128 = []

for file in os.listdir('img/test/'):
    image = imread('img/test/'+file)
    smallimage = cv2.resize(image,(128,128))
    test_128.append(smallimage)

test_128 = np.array(test_128)/255.0

In [None]:
preds_128 = []
preds_128_arr = []

for i in range(len(test_128)):
    
    pred_128 = model.predict(np.expand_dims(test_128[i], axis=0))
    preds_128_arr.append(pred_128)
    
    if pred_128[0][0]>pred_128[0][1] and pred_128[0][0]>pred_128[0][2]:
        preds_128.append(0)
    elif pred_128[0][1]>pred_128[0][0] and pred_128[0][1]>pred_128[0][2]:
        preds_128.append(1)
    else:
        preds_128.append(2)


## Transfer Learning

In [None]:
from keras.utils import np_utils
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.applications.vgg16 import VGG16
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:

base_model = VGG16(input_shape=(64,64,3),
                  include_top = False,
                   weights = 'imagenet'
                  )

In [None]:
base_model.summary()

In [None]:
for layer in base_model.layers:
    layer.trainable = False

In [None]:
## train de last layer of or base model with our data

x = layers.Flatten()(base_model.output)

x = layers.Dense(512, activation='relu')(x)

x = layers.Dense(3, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['acc', f1_m, recall_m, precision_m])

In [None]:
vgghist = model.fit(X_train, Y_train,
                    batch_size=64,
                   epochs = 15)

pd.DataFrame(vgghist.history).plot(figsize=(8,5))
plt.grid(True)
plt.show()

### First model transfer learning predictions

In [None]:
tl1_preds = []
tl1_preds_arr = []
for i in range(len(test)):
    tlf1_pred = model.predict(np.expand_dims(test[i], axis=0))
    tl1_preds_arr.append(tlf1_pred)
    #print(pred)
    if tlf1_pred[0][0]>tlf1_pred[0][1] and tlf1_pred[0][0]>tlf1_pred[0][2]:
        tl1_preds.append(0)
    elif tlf1_pred[0][1]>tlf1_pred[0][0] and tlf1_pred[0][1]>tlf1_pred[0][2]:
        tl1_preds.append(1)
    else:
        tl1_preds.append(2)
tl1_preds

In [None]:
for i in range(len(test)):
    plt.figure(figsize=(30,20))
    plt.subplot(9,5,i+1)
    plt.xticks([])
    plt.yticks([])
    if tl1_preds[i] == 0:
        plt.xlabel('Beisbol')
        plt.imshow(test[i])
    elif tl1_preds[i] == 1:
        plt.xlabel('Cricket')
        plt.imshow(test[i])
    else:
        plt.xlabel('Football')
        plt.imshow(test[i])
plt.show()

## Test for samples of 128 x 128


In [None]:
base_model = VGG16(input_shape=(128,128,3),
                  include_top = False,
                   weights = 'imagenet'
                  )

for layer in base_model.layers:
    layer.trainable = False

In [None]:
x = layers.Flatten()(base_model.output)

x = layers.Dense(512, activation='relu')(x)

x = layers.Dense(256, activation='relu')(x)

x = layers.Dense(128, activation='relu')(x)

x = layers.Dense(3, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['acc', f1_m, recall_m, precision_m])

vgghist = model.fit(X_train_128, Y_train_128,
                    batch_size=64,
                   epochs = 10)
pd.DataFrame(vgghist.history).plot(figsize=(8,5))
plt.grid(True)
plt.show()

## Make predictions

In [None]:
preds_tl_128 = []
preds_tl_128_arr = []
for i in range(len(test_128)):
    pred_128_tf = model.predict(np.expand_dims(test_128[i], axis=0))
    preds_tl_128_arr.append(pred_128_tf)
    #print(pred)
    if pred_128_tf[0][0]>pred_128_tf[0][1] and pred_128_tf[0][0]>pred_128_tf[0][2]:
        preds_tl_128.append(0)
    elif pred_128_tf[0][1]>pred_128_tf[0][0] and pred_128_tf[0][1]>pred_128_tf[0][2]:
        preds_tl_128.append(1)
    else:
        preds_tl_128.append(2)

In [None]:

for i in range(len(test_128)):
    plt.figure(figsize=(30,20))
    plt.subplot(9,5,i+1)
    plt.xticks([])
    plt.yticks([])
    if preds_tl_128[i] == 0:
        plt.xlabel('Beisbol')
        plt.imshow(test_128[i])
    elif preds_tl_128[i] == 1:
        plt.xlabel('Cricket')
        plt.imshow(test_128[i])
    else:
        plt.xlabel('Football')
        plt.imshow(test_128[i])
plt.show()

In [None]:
pd.DataFrame(preds_tl_128, columns = ['predictions']).to_csv('predictions/predictions_tfl_model_128_x_128.csv', index=False)

In [None]:
true = [1,0,2,2,1,1,0,2,2,1,0,0,1,1,2,1,1,2,0,2,0,0,2,0,1,0,1,2,1,0,2,2,0,2,1,2,1,1,0,1,0]

In [None]:
preds_conv_32 = 0
preds_conv = 0
preds_conv_128 = 0
pred_res = 0
pred_128_res = 0
for i in range(len(preds_tl_128)):
    if preds_32[i] == true[i]:
        #preds_conv_32 += 1
    if preds[i] == true[i]:
        #preds_conv += 1
    if preds_128[i] == true[i]:
        #preds_conv_128 += 1
    if tl1_preds[i] == true[i]:
        #pred_res += 1
    if preds_tl_128[i] == true[i]:
        pred_128_res += 1

print('Preds convolutional network 32 x 32: ', preds_conv_32,'\nPreds convolutional network: ', preds_conv,'\nPreds for conv 128x128: ', preds_conv_128,
      '\nPreds for tfl1 64x64: ', pred_res, '\nPreds for tfl2 128x128: ', pred_128_res
     )

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
#confusion matrix for tf 128 x 128
confusion_matrix(true, preds_tl_128)