# Lib

In [1]:
# General libraries
import collections
import gc
import glob
import json
import math
import os
import pickle
import random
import re
import statistics
import sys
import time
import warnings
from contextlib import contextmanager

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import torch
import torch.cuda.amp as amp
import torch.nn as nn
import torch.nn.functional as F
import wandb
from box import Box
from cosine_annealing_warmup import CosineAnnealingWarmupRestarts
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm

In [2]:
# Competition specific libraries
import albumentations as A
import cv2
import timm
from albumentations.pytorch import ToTensorV2

In [3]:
# Additional local libraries

# https://github.com/microsoft/CSWin-Transformer
sys.path.insert(0, "../input/CSWin-Transformer")
print(sys.path)

import models

['../input/CSWin-Transformer', '/data/jupyter/sugiyama/petfinder2/working', '/opt/miniconda3/envs/all-in-one/lib/python38.zip', '/opt/miniconda3/envs/all-in-one/lib/python3.8', '/opt/miniconda3/envs/all-in-one/lib/python3.8/lib-dynload', '', '/home/sugiyama/.local/lib/python3.8/site-packages', '/opt/miniconda3/envs/all-in-one/lib/python3.8/site-packages', '/opt/miniconda3/envs/all-in-one/lib/python3.8/site-packages/IPython/extensions', '/home/sugiyama/.ipython']


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data

In [5]:
DATA_DIR = "../input/petfinder-pawpularity-score/"
OUTPUT_DIR = "./"
MODEL_DIR = "./models/"

# !rm -rf {MODEL_DIR}

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

In [6]:
train = pd.read_csv(DATA_DIR + "train.csv")
test = pd.read_csv(DATA_DIR + "test.csv")
sub = pd.read_csv(DATA_DIR + "sample_submission.csv")

TRAIN_IMAGE_PATH = DATA_DIR + "train/"
TEST_IMAGE_PATH = DATA_DIR + "test/"

# Config

In [7]:
# seed = random.randrange(10000)
seed = 440
print(seed)

440


In [8]:
config_defaults = {
    "seed": seed,
    "n_class": 1,
    "n_fold": 10,
    "epochs": 10,
    "es_patience": 0,
    "batch_size": 64,
    "gradient_accumulation_steps": 1,
    "max_grad_norm": 1000,
    "criterion": "BCEWithLogitsLoss",
    "optimizer": "Adam",
    "scheduler": "CosineAnnealingWarmRestarts",
    "lr": 1e-5,
    "min_lr": 1e-6,
    "weight_decay": 1e-7,
    "momentum": 0.9,
    "model_name": "CSWin_144_24322_large_384",
    "size": 384,
    # "models": [
    #     "swin_large_patch4_window12_384_in22k:v14",
    # ],
    # "runs": [
    #     "34qor14i",  # swin large v14
    #     "tmbsq7j1",  # swin base v1
    # ],
}

In [9]:
config = Box(config_defaults)

In [10]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True


seed_torch(seed=config.seed)

# Dataset

In [11]:
class BaseDataset(Dataset):
    def __init__(self, df, transform=None, label=True):
        self.df = df
        self.file_names = df["Id"].values
        self.features = df.drop(["Id", "Pawpularity"], axis=1).values
        self.transform = transform

        self.use_label = label
        if self.use_label:
            self.path = TRAIN_IMAGE_PATH
            self.labels = df["Pawpularity"].values / 100.0
        else:
            self.path = TEST_IMAGE_PATH

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f"{self.path}/{file_name}.jpg"
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        feature = torch.tensor(self.features[idx])
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]
        if self.use_label:
            label = torch.tensor(self.labels[idx])
            return image, feature, label
        return image, feature

In [12]:
def get_transforms(*, data):
    return A.Compose(
        [
            A.Resize(config.size, config.size),
            # A.CenterCrop(config.size, config.size),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ]
    )

# Model

In [13]:
class BaseModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        self.model_name = model_name
        self.model = timm.create_model(model_name, pretrained=pretrained)

        if "resnext50_32x4d" in model_name:
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, 128)

        elif "efficientnet" in model_name:
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, 128)

        elif any(key in model_name for key in ["vit", "swin", "CSWin"]):
            n_features = self.model.head.in_features
            self.model.head = nn.Linear(n_features, 128)

        self.dropout = nn.Dropout(0.1)
        self.head1 = nn.Linear(140, 64)
        self.head2 = nn.Linear(64, config.n_class)

    # @amp.autocast(enabled=Config.amp)
    def forward(self, x, feats):
        x = self.model(x)
        x = self.dropout(x)
        x = torch.cat([x, feats], dim=1)
        x = self.head1(x)
        x = self.head2(x)
        return x

In [None]:
model = BaseModel(config.model_name)
print(model)

train_ds = BaseDataset(train, transform=get_transforms(data="train"))
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=4, drop_last=True)

for image, feature, label in train_loader:
    output = model(image, feature)
    print(output)
    break

BaseModel(
  (model): CSWinTransformer(
    (stage1_conv_embed): Sequential(
      (0): Conv2d(3, 144, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
      (1): Rearrange('b c h w -> b (h w) c', h=96, w=96)
      (2): LayerNorm((144,), eps=1e-05, elementwise_affine=True)
    )
    (stage1): ModuleList(
      (0): CSWinBlock(
        (qkv): Linear(in_features=144, out_features=432, bias=True)
        (norm1): LayerNorm((144,), eps=1e-05, elementwise_affine=True)
        (proj): Linear(in_features=144, out_features=144, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
        (attns): ModuleList(
          (0): LePEAttention(
            (get_v): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=72)
            (attn_drop): Dropout(p=0.0, inplace=False)
          )
          (1): LePEAttention(
            (get_v): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=72)
            (attn_drop): Dropout(p=0.0, inplace=False)
   