<a href="https://colab.research.google.com/github/O-Kpy/Kaggle/blob/main/notebookee00da7c23.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/rwightman/pytorch-image-models
!pip install --upgrade wandb

In [None]:
import os
import gc
import cv2
import copy
import time
import random
from PIL import Image

# for data manipulation
import numpy as np
import pandas as pd

# pytorch import
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# sklearn import
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

# for descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING']='1'

Weights & Biases(W&B) is a set of machine learning tools that helps you build better models faster. kaggle competitions require fast-paced model development and evaluation. There are a lot components: exploring the training data, training diffent models, combining trained models in different combinations(ensembling), and so on.

W&B can be useful for Kaggle competition with it's lightweight and interoperable tools:

* Quickly track experiments,
* Version and iterate on datasets,
* Evaluate model performance,
* Reproduce models,
* Visualize results and spot regressions,
* Share findings with colleagues.

To learn more about Weights and Biases check out this kernel.

In [None]:
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret('wandb_api')
    wandb.login(key=api_key)
    anony=None
except:
    anony='must'
    print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

In [None]:
ROOT_DIR = "../input/petfinder-pawpularity-score"
TRAIN_DIR = "../input/petfinder-pawpularity-score/train"
TEST_DIR = "../input/petfinder-pawpularity-score/test"

In [None]:
# Traning Configure
CONFIG = dict(
seed=42,
model_name='tf_efficientnet_b4_ns',
train_batch_size=16,
valid_batch_size=32,
img_size=512,
epochs=5,
learning_rate=1e-4,
scheduler='CosineAnnealingLR',
min_lr=1e-6,
T_max=20,
T_0=25,
warmup_epochs=0,
weight_decay=1e-6,
n_accumulate=1,
n_fold=5,
num_classes=1,
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
competitions='PetFinder',
_wandb_kernel='deb')

In [None]:
# Set Seed for Reproducibility
def set_seed(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark=False
    os.environ['PYTHONHASHSEED']=str(seed)

set_seed(CONFIG['seed'])

# Read Data

In [None]:
def get_train_file_path(id):
    return f'{TRAIN_DIR}/{id}.jpg'

In [None]:
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df['file_path'] = df['Id'].apply(get_train_file_path)

feature_cols = [col for col in df.columns if col not in ['Id', 'Pawpularity', 'file_path']]

In [None]:
# visualize images
run = wandb.init(project='Pawpularity',
                config=CONFIG,
                job_type='Visualization',
                anonymous='must')

In [None]:
preview_table = wandb.Table(columns=['Id', 'Image', 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur', 'Pawpularity'])
tmp_df = df.sample(1000, random_state=CONFIG['seed']).reset_index(drop=True)
for i in tqdm(range(len(tmp_df))):
    row = tmp_df.loc[i]
    img=Image.open(row.file_path)
    preview_table.add_data(row['Id'],
                          wandb.Image(img),
                          row['Subject Focus'],
                          row['Eyes'],
                          row['Face'],
                          row['Near'],
                          row['Action'],
                          row['Accessory'],
                          row['Group'],
                          row['Collage'],
                          row['Human'],
                          row['Occlusion'],
                          row['Info'],
                          row['Blur'],
                          row['Pawpularity'])
wandb.log({'Visualization':preview_table})
run.finish()

In [None]:
# Creat Folds
def create_folds(df, n_s=5, n_grp=None):
    df['kfold'] = -1
    if n_grp is None:
        skf = KFold(n_splits=n_s, random_state=CONFIG['seed'])
        target=df['Pawpularity']
    else:
        skf=StratifiedKFold(n_splits=n_s, shuffle=True, random_state=CONFIG['seed'])
        df['grp']=pd.cut(df['Pawpularity'], n_grp, labels=False)
        target = df.grp
    for fold_no, (t, v) in enumerate(skf.split(target, target)):
        df.loc[v, 'kfold'] = fold_no
    
    df = df.drop('grp', axis=1)
    
    return df

In [None]:
df = create_folds(df, n_s=CONFIG['n_fold'], n_grp=14)
df.head()

In [None]:
# Dataset Class
