In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Concatenate, Input
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import os
import random
import math
import sys

%matplotlib inline

In [2]:
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.applications.mobilenet import preprocess_input

In [3]:
model_name = 'MobileNet'
!mkdir './MobileNet'
save_VGG16 = './MobileNet/'

### Load and Preprocess Data

In [4]:
AUTOTUNE = tf.data.experimental.AUTOTUNE  
img_size = 224
channels = 3
Batch_size = 16

# Directory for dataset
train_dir = "/kaggle/input/petfinder-pawpularity-score/train/"
test_dir = "/kaggle/input/petfinder-pawpularity-score/test/"

def seed_everything():
    os.environ['PYTHONHASHSEED'] = str(123)
    np.random.seed(123)
    random.seed(123)
    tf.random.set_seed(123)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
    os.environ['PYTHONHASHSEED'] = str(123)

seed_everything()

In [5]:
# Reading dataset train, test in df and df_test respectively
df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")
Id = df_test["Id"].copy()


# Converting Id column for taking images
df["Id"] = df["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/train/" + x + ".jpg")
df_test["Id"] = df_test["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [6]:
def image_preprocess(is_labelled):  
    def augment(image):
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_contrast(image, 0.95, 1.05)
        return image
    
    def can_be_augmented(img, label):
        return augment(img), label
    
#   If record has label both image and lable will be returned
    return can_be_augmented if is_labelled else augment

def image_read(is_labelled):
    def decode(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=channels)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, (img_size, img_size))
        image = tf.keras.applications.efficientnet.preprocess_input(image)
        return image
    
    def can_be_decoded(path, label):
        return decode(path), label
    return can_be_decoded if is_labelled else decode


# Creating the dataset
def create_dataset(df, df_meta, batch_size, is_labelled = False, augment = False, shuffle = False):
    image_read_fn = image_read(is_labelled)
    image_preprocess_fn = image_preprocess(is_labelled)
    
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values, df_meta.values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values))
    
    dataset = dataset.map(image_read_fn, num_parallel_calls=AUTOTUNE)
    dataset = dataset.map(image_preprocess_fn, num_parallel_calls=AUTOTUNE) if augment else dataset
#     dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) if shuffle else dataset
#     dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def create_metadata_dataset(df, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((df["Pawpularity"].values))
#     dataset = dataset.shuffle(1024, reshuffle_each_iteration=True)
#     dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset 

def create_metadata_dataset_test(df, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((df.values))
#     dataset = dataset.shuffle(1024, reshuffle_each_iteration=True)
#     dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def create_metadata(df, lab,batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((df.values, lab['Pawpularity'].values))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

def create_metadata_test(df,batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((df.values))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [7]:
trn = df.iloc[:9000]
val = df.iloc[9001:]
tes = np.zeros((df_test.shape[0],),dtype='float32')
tes = pd.DataFrame(tes)

xtrain_att_trn = trn.drop(["Id", "Pawpularity"],axis=1)
xtrain_att_val = val.drop(["Id", "Pawpularity"],axis=1)
test_att = df_test.drop(["Id"],axis=1)

xtrain_att_trn.astype('float32')
xtrain_att_val.astype('float32')
test_att.astype('float32')

train = create_dataset(trn, xtrain_att_trn,Batch_size, is_labelled = True, augment = False, shuffle = False)
validation = create_dataset(val, xtrain_att_val,Batch_size, is_labelled = True, augment = False, shuffle = False)
test = create_dataset(df_test, test_att,Batch_size, is_labelled = True, augment = False, shuffle=False)
train_att = create_metadata(xtrain_att_trn, trn,Batch_size)
test_att = create_metadata_test(test_att,Batch_size)


ytrain_trn = create_metadata_dataset(trn, Batch_size)
ytrain_val = create_metadata_dataset(val, Batch_size)
test_val = create_metadata_dataset_test(tes, Batch_size)

train = tf.data.Dataset.zip((train, ytrain_trn)).batch(Batch_size)
validation = tf.data.Dataset.zip((validation, ytrain_val)).batch(Batch_size)
test = tf.data.Dataset.zip((test, test_val)).batch(Batch_size)

### Model training

In [8]:
mobileNet_base = MobileNet(include_top=False, pooling=None, input_shape=(img_size, img_size, channels))

inner = mobileNet_base.output
inner = Flatten()(inner)
inner = Dense(units=256, activation='relu')(inner)
inner = Dropout(0.2)(inner)

inp2 = Input(shape=(12))

con = Concatenate()([inner, inp2])
x = Dense(units = 64, activation="relu")(con)
out = Dense(units = 1)(x)

model = Model([mobileNet_base.input, inp2], out)

In [9]:
early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

model.compile(loss="mse", 
              optimizer = 'adam', 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [10]:
history = model.fit(train,
                      epochs=25, 
                      validation_data = validation,
                      callbacks = [early_stopping])

In [11]:
model.save('petfinder_mobileNet.h5')

### Model evaluation

In [12]:
train_stats = pd.DataFrame(history.history)

train_stats.sort_values(by=['val_root_mean_squared_error'], inplace=False, ascending=True)

In [13]:
train_stats.plot(y=['loss', 'val_loss'], kind='line')

In [14]:
train_stats.plot(y=['root_mean_squared_error', 'val_root_mean_squared_error'], kind='line')

In [15]:
pred = model.predict(test)

final=pd.DataFrame()
final['Id']=Id
final['Pawpularity']=pred
final.to_csv('submission.csv',index=False)