In [None]:
from timm import create_model
import gc
import time
from fastai.vision.all import *
from torch.utils.data import Dataset, DataLoader
import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f'Using device: {device}')

In [None]:
set_seed(365, reproducible=True)
seed=365
torch.cuda.empty_cache()
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

BATCH_SIZE = 32
Image_size = 224
epoch = 8
Model_name = "swin_large_patch4_window7_224"
META_FEATURES = ['is_cat','n_pets','pet_ratio']

params = {
    'model': 'swin_large_patch4_window7_224',
    'dense_features': ['is_cat','n_pets','pet_ratio'],
    'pretrained': True,
    'inp_channels': 3,
    'im_size': 224,
    'device': device,
    'batch_size': 32,
    'num_workers' : 8,
    'out_features': 1,
    'seed' : 42,
    'N_FOLDS' : 10
}

In [None]:
if not os.path.exists('../petfin/output/checkpoints/'):
    os.makedirs('../petfin/output/checkpoints/')
#!cp '../petfin/swin_large_patch4_window12_384_22kto1k.pth' '../petfin/output/checkpoints/swin_large_patch4_window12_384_22kto1k.pth'
!cp '../petfin/swin_large_patch4_window7_224_22kto1k.pth' '../petfin/output/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

In [None]:
dataset_path = Path('../petfin/petfinder-pawpularity-score/')
train_df = pd.read_csv(dataset_path/'train_add_f.csv')

In [None]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe

In [None]:
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
num_bins = int(np.ceil(2*((len(train_df))**(1./3))))
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)

In [None]:
train_df['fold'] = -1
N_FOLDS = params['N_FOLDS']         #was10
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

In [None]:
def init_logger(log_file='train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger


LOGGER = init_logger()

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
def get_data(fold):
    train_df_f = train_df.copy()
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
#                                valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=365, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
    
    return dls

In [None]:
meta_target = train_df[META_FEATURES].keys().tolist()
LOGGER.info(f"Meta features length:{len(meta_target)}")

In [None]:
the_data = get_data(0)
LOGGER.info('Checking of get_data function is finished')
assert (len(the_data.train) + len(the_data.valid)) == (len(train_df)//params['batch_size'])

In [None]:
def get_learner(fold_num):
    data = get_data(fold_num)
    
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c)

    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse).to_fp16()
    
    return learn

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')
test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
get_learner(fold_num=0).lr_find(end_lr=3e-2)

In [None]:
all_preds = []

for i in range(N_FOLDS):

    print(f'Fold {i} results')
    
    learn = get_learner(fold_num=i)

    learn.fit_one_cycle(5, 1e-5, cbs=[SaveModelCallback(fname=f'swinl224_{i}'), EarlyStoppingCallback(monitor='petfinder_rmse', comp=np.less, patience=2)]) 
    
    learn.recorder.plot_loss()
    
    #learn = learn.to_fp32()
    #learn.save(f'model_fold_{i}', with_opt=False)

    #learn = learn.to_fp32()
    
    #learn.export(f'model_fold_{i}.pkl')
    #learn.save(f'model_fold_{i}.pkl')
    
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=365, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) 
    
    test_dl = dls.test_dl(test_df)
    
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    
    all_preds.append(preds)
    
    learn.export(f'export_fold_{fold}.pkl')
    
    del learn

    torch.cuda.empty_cache()

    gc.collect()
