In [None]:
import os
import numpy as np 
import pandas as pd 
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image
import seaborn as sns
#import cuml
from sklearn.svm import SVR
import xgboost as xgb

TARGET = 'Pawpularity'
VAL_SIZE = 0.2
#SEED = 1

# TensorFlow settings and training parameters
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMG_SIZE = 240
BATCH_SIZE = 60
DROPOUT_RATE = 0.1
LEARNING_RATE = 0.1
DECAY_STEPS = 100
DECAY_RATE = 0.95
EPOCHS = 20
PATIENCE = 5

#base_model = '../input/keras-pretrained-models/ResNet101_Top_ImageNet.h5'
base_model = '../input/keras-pretrained-models/EfficientNetB1_Top_ImageNet.h5'
base_model = tf.keras.models.load_model(base_model)
# Freeze weights in the original model
base_model.trainable = False

In [None]:
train_csv = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_csv = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
train_no_id = train_csv.copy()
train_no_id.pop('Id')
corr = np.corrcoef(train_no_id,rowvar=False)
ax = sns.heatmap(corr)

In [None]:
# Image data directories
TRAIN_DIRECTORY = '../input/petfinder-pawpularity-score/train'
TEST_DIRECTORY = '../input/petfinder-pawpularity-score/test'

# Reconstruct the paths to train and test images.
train_csv['path'] = train_csv['Id'].apply(lambda x: os.path.join(TRAIN_DIRECTORY, f'{x}.jpg'))
test_csv['path'] = test_csv['Id'].apply(lambda x: os.path.join(TEST_DIRECTORY, f'{x}.jpg'))

rand_pic = np.random.randint(9912, size=(12))
fig, ax = plt.subplots(nrows=3, ncols=4,figsize = (14,8))

for i, pic in enumerate(rand_pic):
    r, c = i//4, i%4
    ax[r,c].imshow(np.asarray(Image.open(train_csv['path'][pic])))
    ax[r,c].axis('off')
    label = train_csv['Pawpularity'][pic]
    ax[r,c].set_title(f'Pawpularity Score: {label}', fontsize = 12, fontfamily='monospace', fontweight='bold')
    
fig, ax = plt.subplots(nrows=2, ncols=4,figsize = (14,8))
for pic in range(8):
    r, c = pic//4, pic%4
    ax[r,c].imshow(np.asarray(Image.open(test_csv['path'][pic])))
    ax[r,c].axis('off')
    label = '?'
    ax[r,c].set_title(f'Pawpularity Score: {label}', fontsize = 12, fontfamily='monospace', fontweight='bold')

In [None]:
@tf.function
def get_image(path: str) -> tf.Tensor:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :return: Tensor with preprocessed image
    """
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)
    image = tf.cast(tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE), dtype=tf.int32)
    return tf.keras.applications.efficientnet.preprocess_input(image)

@tf.function
def get_image_wscore(path: str, score: int) -> tuple:
    """Function returns preprocessed image and label.
    :param path: Path to image file
    :param label: Class label
    :return: tf.Tensor with preprocessed image, numeric label
    """
    return get_image(path), score

@tf.function
def get_dataset(x, y=None) -> tf.data.Dataset:
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x,y))
        return ds.map(get_image_wscore, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
t = 10
best_props = []
for i in range(t):

    # Keep a portion of the labeled data for validation.
    train_subset, valid_subset = train_test_split(
        train_csv[['path', TARGET]],
        test_size=VAL_SIZE, shuffle=True)

    # Create TensorFlow datasets
    train_ds = get_dataset(x=train_subset['path'], y=train_subset[TARGET])
    valid_ds = get_dataset(x=valid_subset['path'], y=valid_subset[TARGET])
    test_ds = get_dataset(x=test_csv['path'])

    image_model_base = tf.keras.models.Sequential(
        [tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal'),
        base_model
        ])

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=LEARNING_RATE,
        decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
        staircase=True)

    # Compile the model
    image_model_base.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                        loss=tf.keras.losses.MeanSquaredError(),
                        metrics=[tf.keras.metrics.RootMeanSquaredError()])
    #image_model_base.summary()
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)


    base_pred_train = image_model_base.predict(train_ds)
    base_pred_valid = image_model_base.predict(valid_ds)
    base_pred_test = image_model_base.predict(test_ds)
    
    xgb_model = xgb.XGBRegressor(colsample_bytree = 0.3, learning_rate = 0.1,
                    max_depth = 5, alpha = 5, n_estimators = 100)
    xgb_model.fit(base_pred_train,train_subset[TARGET])

    svr_model = SVR(C=10)
    svr_model.fit(base_pred_train,train_subset[TARGET])

    nn_model = image_model_base
    nn_model.add(tf.keras.layers.BatchNormalization())
    nn_model.add(tf.keras.layers.Dropout(DROPOUT_RATE, name='top_dropout'))
    nn_model.add(tf.keras.layers.Dense(32, activation='relu'))
    nn_model.add(tf.keras.layers.Dropout(DROPOUT_RATE, name='top_dropout2'))
    nn_model.add(tf.keras.layers.Dense(1, name='score'))
    
    # Compile the model
    nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                        loss=tf.keras.losses.MeanSquaredError(),
                        metrics=[tf.keras.metrics.RootMeanSquaredError()])

    history = nn_model.fit(train_ds, validation_data = valid_ds,
                              epochs=EPOCHS, callbacks=[early_stop],
                              use_multiprocessing=True, workers=-1)

    nn_valid = nn_model.predict(valid_ds, use_multiprocessing=True, workers=os.cpu_count())
    svr_valid = svr_model.predict(base_pred_valid)
    xgb_valid = xgb_model.predict(base_pred_valid)
    
    # Performance by validaty dataset
    rsme = np.sqrt( np.mean( (valid_subset[TARGET] - np.array(nn_valid).reshape(valid_subset[TARGET].shape))**2.0 ) )
    print('NN RSME =',rsme,'\n')
    rsme = np.sqrt( np.mean( (valid_subset[TARGET] - np.array(svr_valid))**2.0 ) )
    print('SVR RSME =',rsme,'\n')
    rsme = np.sqrt( np.mean( (valid_subset[TARGET] - np.array(xgb_valid))**2.0 ) )
    print('XGB RSME =',rsme,'\n')
    
    # Fine best weight
#     rmse_valid = [0 for i in range(10)]
#     idx_min = 0
#     rmse_min = 100
#     for w in range(10):
#         prop = w/10
#         valid_pred = svr_valid*prop+np.array(nn_valid).reshape(svr_valid.shape)*(1-prop)
#         rmse_valid[w] = np.sqrt(np.mean(valid_subset[TARGET]-valid_pred))
#         if rmse_valid[w]<rmse_min:
#             rmse_min = rmse_valid[w]
#             idx_min = w
#     best_props.append(idx_min/10)
    
    # Prediction for test dataset
    test_csv['nn_Pawpularity'+str(i)] = nn_model.predict(test_ds, use_multiprocessing=True, workers=os.cpu_count())
    test_csv['svr_Pawpularity'+str(i)] = svr_model.predict(base_pred_test)
    test_csv['xgb_Pawpularity'+str(i)] = xgb_model.predict(base_pred_test)
    test_csv[TARGET+str(i)] = test_csv['svr_Pawpularity'+str(i)]*0.3 + test_csv['nn_Pawpularity'+str(i)]*(0.5) + test_csv['xgb_Pawpularity'+str(i)]*(0.2)

In [None]:
# best_props

In [None]:
test_csv[TARGET] = test_csv[[TARGET + str(i) for i in range(t)]].mean(axis=1)
test_csv

In [None]:
test_csv[['Id', TARGET]].to_csv('submission.csv', index=False)

In [None]:
# linear_model = tf.keras.Sequential([
#     tf.keras.layers.Normalization(axis=-1),
#     layers.Dense(32, activation='relu'),
#     layers.Dense(1)
# ])
# linear_model.compile(loss=tf.keras.losses.MeanSquaredError(),optimizer=tf.keras.optimizers.Adam(LEARNING_RATE))

In [None]:
# train_target = train_csv.pop(TARGET)
# train_csv.pop('path')
# train_csv.pop('Id')

In [None]:
# history = linear_model.fit(
#     train_csv,
#     train_target,
#     validation_split=0.2,
#     verbose=0, epochs=EPOCHS)

In [None]:
# test_csv.pop('path')
# image_pred = test_csv.pop('image_Pawpularity')
# test_id = test_csv.pop('Id')
# test_csv['meta_Pawpularity'] = linear_model.predict(test_csv)
# test_csv['Id'] = test_id
# test_csv['image_Pawpularity'] = image_pred

https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/image_classification_efficientnet_fine_tuning.ipynb#scrollTo=pTMdgllJaAiQ