In [1]:
!pip install timm

In [3]:
import os
import gc
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, Input, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from PIL import Image

from sklearn.model_selection import train_test_split

In [6]:
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    os.environ['PYTHONHASESEED'] = str(seed)
    
set_seed(42)

In [4]:
# 구성 선언
AUTOTUNE = tf.data.experimental.AUTOTUNE  # AUTOTUNE은 tf.data에 런타임에 값을 동적으로 조정하도록 요청합니다 --> 계산시간을 줄인다.
img_size = 224
channels = 3
Batch_size = 32

In [5]:
train_dir = '../input/petfinder-pawpularity-score/train'
test_dir = '../input/petfinder-pawpularity-score/test'

# Data Processing

In [13]:
df = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
df_test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
Id = df_test['Id'].copy()

def get_file_path(id):
    return f'{train_dir}/{id}.jpg'

df['file_path'] = df['Id'].apply(get_file_path)

def get_test_file_path(id):
    return f'{test_dir}/{id}.jpg'

df_test['file_path'] = df_test['Id'].apply(get_test_file_path)

In [10]:
# Augmentations
def image_processing(is_labelled):
    def augment(image):
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_saturation(image, 0.95, 1.05)
        image = tf.image.random_contrast(image, 0.95, 1.05)
        return image
    
    def can_be_augmented(img, label):
        return augment(img), label
    
    return can_be_augmented if is_labelled else augment  # 이렇게 어그멘테이션하는게 맞나?

# read image
def image_read(is_labelled):
    def decode(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=channels)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, (img_size, img_size))
        image = tf.keras.applications.efficientnet.preprocess_input(image)  # preprocess_input() == 모델에 필요한 형식에 이미지를 적절하게 맞추기위한 것
        return image
    
    def can_be_decoded(path, label):
        return decode(path), label
    
    return can_be_decoded if is_labelled else decode

# create dataset
def create_dataset(df, batch_size, is_labelled=False, augment=False, shuffle=False):
    image_read_fn = image_read(is_labelled)
    image_preprocess_fn = image_processing(is_labelled)
    
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df['file_path'].values, df['Pawpularity'].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df['file_path']))
    
    dataset = dataset.map(image_read_fn, num_parallel_calls=AUTOTUNE)
    dataset = dataset.map(image_preprocess_fn, num_parallel_calls=AUTOTUNE) if augment else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [14]:
# data 나누기
trn = df.iloc[:9000]
val = df.iloc[9001:]

train = create_dataset(trn, Batch_size, is_labelled=True, augment=True, shuffle=True)
valid = create_dataset(val, Batch_size, is_labelled=True, augment=False, shuffle=False)
test = create_dataset(df_test, Batch_size, is_labelled=False, augment=False, shuffle=False)

# CNN Model

In [16]:
# Loading pretrained Efficientnet
img_mod = '../input/keras-applications-models/EfficientNetB0.h5'
efnet = tf.keras.models.load_model(img_mod)

# Layers of efficientnet will not be trained
efnet.trainable = False

In [18]:
# 모델 받기
model = Sequential([
    Input(shape=(img_size, img_size, channels)), # 이미지 받기
    efnet,  # pretrained 불러온 모델
    BatchNormalization(),
    Dropout(0.6),
    Dense(units=64, activation='relu'),
    Dense(units=1, activation='relu')
])

In [23]:
# Ealry stopping
ealry_stopping = EarlyStopping(patience=5, restore_best_weights=True
                              ,monitor='val_loss')

lr_scheduler = ExponentialDecay(
    initial_learning_rate = 1e-3,
    decay_steps = 100,
    decay_rate = 0.96,
    staircase = True
)

In [22]:
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler),
             metrics=[tf.keras.metrics.RootMeanSquaredError()])

predictor = model.fit(train, epochs=20, validation_data=valid, callbacks=[ealry_stopping])

In [None]:
pred = model.predict(test)

submit = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
submit['Pawpularity'] = pred
submit