In [43]:
# Importing the relevant Python libraries

%reset -f

# General libraries
import numpy as np
import pandas as pd
import scipy
import os
import albumentations as A
import cv2

# ML libraries
import sklearn
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import keras_tuner as kt

# Changing a few default options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold = 1e6)

In [44]:
# Declaring an augmentation pipeline

l=50 # Resizing parameter

transform = A.Compose(
    [
        A.Resize(l, l, p=1), # Resizing the picture
        A.Normalize( # Normalising the picture
            mean=[0, 0, 0], # Zero mean
            std=[1, 1, 1], # Unit variance
            p=1 # All pictures fed to the pipeline undergo this step
        ),
        A.HorizontalFlip(p=0.5), # 50% of the pictures fed to the pipeline are flipped horizontally
        A.MedianBlur(blur_limit=3, always_apply=False, p=0.5)  # 50% of the pictures fed to the pipeline are blurred
    ]
)

In [45]:
# Extracting features from all pictures

# # Kaggle directory
# dir_gen='/kaggle/input/petfinder-pawpularity-score'
# dir_train='/kaggle/input/petfinder-pawpularity-score/train'
# dir_test='/kaggle/input/petfinder-pawpularity-score/test'

# Local directory
dir_gen='../../../data'
dir_train='../../../data\\train'
dir_test='../../../data\\test'

data_train=pd.read_csv(os.path.join(dir_gen, 'train.csv')) # Loading the initial data as provided by Kaggle
data_train.set_index('Id', inplace=True) # Setting the index

image_train, image_id_train, image_test, image_id_test, Y =([] for i in range(0,5)) # Initialising five lists

for dirname, _, filenames in os.walk(dir_gen):  # Listing all files and directories in dir_gen
    for filename in filenames: # Looping over the filenames
        if filename[-3:]=='jpg': # Checking whether the file is a jpg image
            img = cv2.imread(os.path.join(dirname, filename)) # Loading the image
            transformed_img = transform(image=img)["image"] # Transforming the image according to the transformation pipeline
            file_index=filename.replace(".jpg", "") # The index is the file name
            if dirname==dir_train: # If the image is in the training set
                image_train.append(transformed_img) # Appending the image to the corresponding list
                image_id_train.append(file_index) # Appending the image index to the corresponding list
                Y.append(data_train.loc[pd.Index([file_index])].iloc[0]["Pawpularity"]/100.0) # Appending the image score to the corresponding list
            elif dirname==dir_test: # If the image is in the testing set
                image_test.append(transformed_img) # Appending the image to the corresponding list
                image_id_test.append(file_index) # Appending the image index to the corresponding list

In [46]:
# Defining the feature matrices and output vector
Y=np.asarray(Y)
X=np.asarray(image_train)
X_test=np.asarray(image_test)

# Checking the shape of the resulting matrices
print('Shape of Y:', Y.shape)
print('Shape of X:', X.shape)
print('Shape of X_test:',X_test.shape)

Shape of Y: (9912,)
Shape of X: (9912, 50, 50, 3)
Shape of X_test: (8, 50, 50, 3)


In [47]:
# Training the Convolutional Neural Network

def model_builder(hp):
    model = keras.Sequential()
    
    # A typical CNN architecture (Convolution layer, Relu activation function, MaxPooling layer repeated several times) is adopted
    model.add(layers.Conv2D(filters=100, kernel_size=(5,5), activation='relu', input_shape=(l, l, 3)))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(filters=100, kernel_size=(5,5), activation='relu'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(keras.layers.Dense(50, activation='relu')) # Linear layer
    model.add(layers.Dropout(0.2)) # Dropout layer
    model.add(keras.layers.Dense(1, activation='linear')) # Linear layer

    # The learning rate will be tuned using keras_tuner
    hp_learning_rate = hp.Choice('learning_rate', values=[3e-1, 1e-1, 3e-2, 1e-2, 3e-3, 1e-3, 3e-4, 1e-4])
    
    # Compiling the model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='mse',
                metrics=[tf.keras.metrics.RootMeanSquaredError()])

    return model

In [48]:
# Defining the Hyperband tuner (from the keras_tuner library) properties

tuner = kt.Hyperband(model_builder,
                     objective='val_loss',
                     max_epochs=10,
                     overwrite=True)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [49]:
try: # Removing the old folder if it exists
    os.remove("/untitled_project")
except:
    pass

# Looking for the optimal hyperparameters values
tuner.search(X, Y, validation_split=0.2, callbacks=[stop_early])

# Getting the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print('\n\n',tuner.results_summary(num_trials=1))

Trial 8 Complete [00h 01m 25s]
val_loss: 0.04444742202758789

Best val_loss So Far: 0.04175742343068123
Total elapsed time: 00h 11m 04s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in .\untitled_project
Showing 1 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
learning_rate: 0.001
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 2
tuner/round: 0
Score: 0.04175742343068123


 None


In [50]:
# Building the model with the best hyperparameters and training it on the data for 30 epochs. The optimal number of epochs is then selected

model = tuner.hypermodel.build(best_hps)
history = model.fit(X, Y, epochs=30, validation_split=0.2)

val_loss_per_epoch = history.history['val_loss']
best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Best epoch: 4


In [51]:
# Retraining the model with the best hyperparameters and optimal number of epochs

hypermodel = tuner.hypermodel.build(best_hps)
hypermodel.fit(X, Y, epochs=best_epoch, validation_split=0.2)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1c03af126a0>

In [52]:
Y_test = hypermodel.predict(X_test).flatten() # Predicting the output

# Writing the results to a file
final_data=np.column_stack((np.transpose(image_id_test), np.transpose(Y_test).astype(np.object)*100.0))
np.savetxt("submission.csv", final_data, delimiter=",", header='Id,Pawpularity', fmt='%s,%f', comments='')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  final_data=np.column_stack((np.transpose(image_id_test), np.transpose(Y_test).astype(np.object)*100.0))
