# Data Loading

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
test = pd.read_csv('../input/open-your-eyes-for-the-beauty-around-you/test.csv')

def get_test_file_path(image_id):
    return "../input/open-your-eyes-for-the-beauty-around-you/test_images/{}".format(image_id)

test['file_path'] = test['image_id'].apply(get_test_file_path)

display(test.head())

Unnamed: 0,image_id,brightness,contrast,colorfulness,sharpness,file_path
0,00439614c2.jpg,0.387638,0.379885,0.131153,11.679899,../input/open-your-eyes-for-the-beauty-around-...
1,0075aff7fa.jpg,0.665401,0.242671,0.246394,16.62509,../input/open-your-eyes-for-the-beauty-around-...
2,009ed311ee.jpg,0.416759,0.347511,0.165954,18.508671,../input/open-your-eyes-for-the-beauty-around-...
3,00c942fbe7.jpg,0.354192,0.280639,0.219046,21.409334,../input/open-your-eyes-for-the-beauty-around-...
4,011ef38420.jpg,0.458954,0.159165,0.305201,21.264408,../input/open-your-eyes-for-the-beauty-around-...


# Directory settings

In [3]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = '/kaggle/working'
# MODEL_DIR = '../input/petfinder-efficientnet-b0-starter-training/'
# LGB_MODEL_DIR = '../input/petfinder-efficientnet-b0-lgb-training/'
# if not os.path.exists(OUTPUT_DIR):
#     os.makedirs(OUTPUT_DIR)

# CFG

In [4]:
# ====================================================
# CFG
# ====================================================
class CFG:
    num_workers=4
#     size=512
    batch_size=64
    model_name='tf_efficientnet_b3_ns'
    seed=42
    target_size=1
    target_col='MOS'
    n_fold=5

# Library

In [5]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import math
import time
import pickle
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

sys.path.append('/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

import lightgbm as lgb

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Utils

In [6]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

# Dataset

In [7]:
# ====================================================
# Dataset
# ====================================================
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_names[idx]
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image=image)['image']
        return image

# Transforms

In [8]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    
    if data == 'train':
        return A.Compose([
            A.RandomResizedCrop(384, 512, scale=(0.85, 1.0)),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return A.Compose([
            A.Resize(384, 512),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

# MODEL

In [9]:
# ====================================================
# MODEL
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(self.cfg.model_name, pretrained=pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.fc = nn.Linear(self.n_features, self.cfg.target_size)

    def feature(self, image):
        feature = self.model(image)
        return feature
        
    def forward(self, image):
        feature = self.feature(image)
        output = self.fc(feature)
        return output

# Helper functions

In [10]:
# ====================================================
# Helper functions
# ====================================================
def get_features(test_loader, model, device):
    model.eval()
    features = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (images) in tk0:
        images = images.to(device)
        batch_size = images.size(0)
        with torch.no_grad():
            feature = model.feature(images)
        features.append(feature.to('cpu').numpy())
    features = np.concatenate(features)
    return features

In [11]:
IMG_FEATURES = []
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, 
                         batch_size=CFG.batch_size * 2, 
                         shuffle=False, 
                         num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
for fold in range(CFG.n_fold):
    model = CustomModel(CFG, pretrained=False)
    state = torch.load('../input/tf-efficientnet-b3-ns/'+f'{CFG.model_name}_fold{fold}_best.pth', 
                       map_location=torch.device('cpu'))['model']
    model.load_state_dict(state)
    model.to(device)
    features = get_features(test_loader, model, device)
    IMG_FEATURES.append(features)
    del state; gc.collect()
    torch.cuda.empty_cache()

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

# LGB

In [12]:
features = ['brightness', 'contrast', 'colorfulness', 'sharpness'] + [f"img_{i}" for i in np.arange(1536)]

In [13]:
def inference_single_lightgbm(test, features, model_path, fold):
    test[[f"img_{i}" for i in np.arange(1536)]] = IMG_FEATURES[fold]
    with open(model_path, 'rb') as fin:
        clf = pickle.load(fin)
    prediction = clf.predict(test[features], num_iteration=clf.best_iteration)
    return prediction

In [14]:
model_paths = [(fold, '../input/lgbm-kaggle/'+f'kaggle_weightslightgbm_fold{fold}.pkl') for fold in range(5)]
predictions = [inference_single_lightgbm(test, features, model_path, fold) for fold, model_path in model_paths]
predictions = np.mean(predictions, 0)

In [15]:
test['MOS'] = predictions
test[['image_id', 'MOS']].to_csv('submission.csv', index=False)
display(test[['image_id', 'MOS']].head())

Unnamed: 0,image_id,MOS
0,00439614c2.jpg,3.275537
1,0075aff7fa.jpg,3.881359
2,009ed311ee.jpg,3.357182
3,00c942fbe7.jpg,3.464328
4,011ef38420.jpg,3.346842
