# Import Dependencies

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import tensorflow as tf
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

2021-10-12 02:24:48.879336: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


# Initialize

In [2]:
IMAGE_SIZE = 224
BATCH_SIZE = 64
CHANNEL = 3
root_dir = "/kaggle/input/petfinder-pawpularity-score/"
train_dir = root_dir + "train/"
test_dir = root_dir + "test/"
def seed_everything():
    np.random.seed(42)
    random.seed(42)
    tf.random.set_seed(42)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
    os.environ['PYTHONHASHSEED'] = str(42)

seed_everything()

# Preprocessing

## Import dataset

In [3]:
train_df = pd.read_csv(root_dir + "train.csv")
test_df = pd.read_csv(root_dir + "test.csv")

Id = test_df["Id"].copy()

train_df["Id"] = train_dir + train_df["Id"] + ".jpg"
test_df["Id"] = test_dir + test_df["Id"] + ".jpg"


In [4]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
def image_preprocess(is_labelled):  
    def augment(image):
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_saturation(image, 0.90, 1.10)
        image = tf.image.random_contrast(image, 0.90, 1.10)
        return image
    
    def can_be_augmented(img, label):
        return augment(img), label
    
    return can_be_augmented if is_labelled else augment

def image_read(is_labelled):
    def decode(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=CHANNEL)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        image = tf.keras.applications.efficientnet.preprocess_input(image) 
        return image
    
    def can_be_decoded(path, label):
        return decode(path), label

    return can_be_decoded if is_labelled else decode

def create_dataset(df, batch_size, is_labelled = False, augment = False, shuffle = False):
    image_read_fn = image_read(is_labelled)
    image_preprocess_fn = image_preprocess(is_labelled)
    
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values, df["Pawpularity"].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values))
    
    dataset = dataset.map(image_read_fn, num_parallel_calls=AUTOTUNE)
    dataset = dataset.map(image_preprocess_fn, num_parallel_calls=AUTOTUNE) if augment else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [5]:
train_percent = 0.75
train_size = int(train_percent * len(train_df))

shuffle_df = train_df.sample(frac=1)
train_set = shuffle_df[:train_size]
val_set = shuffle_df[train_size:]

In [6]:
train = create_dataset(train_set, BATCH_SIZE, is_labelled = True, augment = True, shuffle = True)
validate = create_dataset(val_set, BATCH_SIZE, is_labelled = True, augment = False, shuffle = False)
test = create_dataset(test_df, BATCH_SIZE, is_labelled = False, augment = False, shuffle = False)

2021-10-12 02:24:53.037049: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-12 02:24:53.040263: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-10-12 02:24:53.078431: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-12 02:24:53.079200: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2021-10-12 02:24:53.079290: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-10-12 02:24:53.104989: I tensorflow/stream_executor/platform/def

# Construct Model

In [7]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import BatchNormalization, Dense, Flatten
from tensorflow.keras import Model
vgg = VGG16(input_shape=[IMAGE_SIZE,IMAGE_SIZE,CHANNEL], weights="/kaggle/input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5", include_top=False)
for layer in vgg.layers:
    layer.trainable = False

In [8]:
model = tf.keras.models.Sequential([
    vgg,
    Flatten(),
    BatchNormalization(),
    Dense(units = 64, activation="relu"),
    Dense(units = 1, activation="relu")
])

In [9]:
model.compile(optimizer='adam',
              loss="mse",
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Train Model

In [10]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "vgg_model.h5", save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5)

In [11]:
history = model.fit(train,
                      epochs=15, 
                      validation_data=validate,
                      callbacks=[model_checkpoint,early_stopping])

Epoch 1/15


2021-10-12 02:24:58.177531: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-10-12 02:24:58.186612: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2000150000 Hz
2021-10-12 02:24:58.324959: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2021-10-12 02:24:59.253440: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2021-10-12 02:25:06.749149: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15


# Model Prediction

In [12]:
prediction = model.predict(test)

submission = pd.DataFrame()
submission['Id'] = Id
submission['Pawpularity'] = prediction
submission.to_csv('submission.csv',index=False)