In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install signate



In [None]:
!cp ./drive/MyDrive/MyStudy/MySIGNATE/signate.json /root/.signate/

In [None]:
!signate list

In [None]:
!signate download --competition-id=1106

dataset_definition.md

train.csv

sample_submit.csv

test.zip

train.zip

[32m
Download completed.[0m


In [None]:
%%capture
!unzip train.zip

In [None]:
%%capture
!unzip test.zip

In [None]:
!pip install git+https://github.com/rinnakk/japanese-clip.git

In [None]:
import japanese_clip as ja_clip

In [None]:
import os
import io
from PIL import Image
import torch

from sklearn.model_selection import train_test_split
import pandas as pd
import os.path as osp

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### データの前処理

In [None]:
train_df = pd.read_csv('./train.csv')

In [None]:
model, preprocess = ja_clip.load('rinna/japanese-cloob-vit-b-16', device=device)
tokenizer = ja_clip.load_tokenizer()

In [None]:
class ImgDataset(Dataset):
    def __init__(self, img_dir, img_paths, labels, transform=None):
        self.img_dir = img_dir
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_df)

    def __getitem__(self, idx):
        img_path = osp.join(self.img_dir, self.img_paths[idx])
        img = Image.open('img_path')
        if self.transform is not None:
            img = self.transform(img)
        image = preprocess(img)

        return image, self.labels[idx]

In [None]:
from torchvision import transforms

trans = transforms.Compose([
    transforms.AutoAugment(),
    transforms.RandomApply(
        nn.ModuleList([
            transforms.GaussianBlur(kernel_size=11)
        ]),
        p=0.2
    ),

    transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation((-180, 180))
])

In [None]:
train, valid = train_test_split(train_df, test_size=0.2, stratify=train_df['label'], random_state=42, shuffle=True)
train_dataset = ImgDataset(img_dir='./train', img_paths=train['image_name'].values, labels=train['label'].values, transform=trans)
valid_dataset = ImgDataset(img_dir='./train', img_paths=valid['image_name'].values, labels=valid['label'].values)

In [None]:
BATCH_SIZE = 32

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

### 分類ヘッドNN

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, mid_dim):
        super().__init__()

        self.mid_dim = mid_dim

        # self.model = nn.Sequential(
        #     nn.Dropout1D(p=0.3),
        #     nn.Linear(512, mid_dim),
        #     nn.ReLU(),
        #     nn.Dropout1D(p=0.3),
        #     nn.Linear(mid_dim, 2),
        # )

        self.model = nn.Linear(512, 2)

    def forward(self, x):
        y = self.model(x)

        return y

In [None]:
classifier_head = SimpleNN(64)
classifier_head.to(device)

In [None]:
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR

EPOCH_NUM = 50
optimizer = Adam(classifier_head.parameters(), lr=1e-3)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCH_NUM, eta_min=1e-6)
criterion = nn.CrossEntropyLoss()

In [None]:
writer = SummaryWriter(log_dir='./log_full_train')

### japanese cloob

In [None]:
model, _ = ja_clip.load('rinna/japanese-cloob-vit-b-16', device=device)
model.train()
model.required_grad_(False)
model.vision_model.required_grad_(True)
optimizer_base = Adam(model.parameters(), lr=1e-4)
scheduler_base = CosineAnnealingLR(optimizer_base, T_max=EPOCH_NUM, eta_min=1e-6)

In [None]:
torch.cuda.empty_cache()

### training and validation

In [None]:
from tqdm import tqdm

prev_loss = float('inf')
best_loss = float('inf')
pbar = tqdm(range(EPOCH_NUM))
l2_coef = 5e-4

for epoch in pbar:
    # model を訓練モードにする
    classifier_head.train()
    model.train()
    for iter_num, (X, y) in enumerate(train_dataloader):
        X = X.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        optimizer_base.zero_grad()

        with torch.autocast('cuda'):
            image_features = model.get_image_features(X)
            pred_y = classifier_head(image_features)

            l2_loss = 0.0
            for param in classifier_head.parameters():
                l2_loss += torch.norm(param)**2
            loss = criterion(pred_y, y) + l2_coef*l2_loss

        loss.backward()
        optimizer.step()
        optimizer_base.step()
        pbar.set_description(f'Epoch/Iter {epoch}/{iter_num}: loss{loss.item():.4f} / {prev_loss:.4f}')
        writer.add_scalar('train/loss', loss.item(), epoch*len(train_dataloader)+iter_num)
    scheduler.step()
    scheduler_base.step()

    val_loss = 0
    classifier_head.eval()
    model.eval()

    for item_num, (X, y) in enumerate(valid_dataloader):
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad(), torch.autocast('cuda'):
            image_features = model.get_image_features(X)
            pred_y = classifier_head(image_features)
            loss = criterion(pred_y, y)
            val_loss += loss.item()
        writer.add_scaler('val/loss', loss.item(), epoch*len(valid_dataloader)+iter_num)

    prev_loss = val_loss / len(valid_dataloader)
    if best_loss > prev_loss:
        best_loss = prev_loss
        torch.save(classifier_head.state_dict(), '/content/drive/MyDrive/MyStudy/MySIGNATE/package-classification-comp/models/classifier_head/classifier_head_full.pt')
        torch.save(model.state_dict(), '/content/drive/MyDrive/MyStudy/MySIGNATE/package-classification-comp/models/ja_clip/ja_clip_full.pt')
    print(prev_loss)