In [1]:
import shutil
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2
import gc
from PIL import Image

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time
from torch.optim import lr_scheduler
import GPUtil


device = torch.device('cuda')

In [2]:
train_png = sorted(glob('train/*.png'))
test_png = sorted(glob('test/*.png'))

In [3]:
train_y = pd.read_csv("open/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [4]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (224, 224))
    return img

In [5]:
train_imgs = [img_load(m) for m in tqdm(train_png)]
test_imgs = [img_load(n) for n in tqdm(test_png)]

100%|██████████| 4277/4277 [01:53<00:00, 37.53it/s]
100%|██████████| 2154/2154 [00:53<00:00, 40.06it/s]


In [11]:
meanRGB = [np.mean(x, axis=(0,1)) for x in train_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in train_imgs]

meanR = np.mean([m[0] for m in meanRGB])/255
meanG = np.mean([m[1] for m in meanRGB])/255
meanB = np.mean([m[2] for m in meanRGB])/255

stdR = np.mean([s[0] for s in stdRGB])/255
stdG = np.mean([s[1] for s in stdRGB])/255
stdB = np.mean([s[2] for s in stdRGB])/255

print("train 평균: {:.6f}, {:.6f}, {:.6f}".format(meanR, meanG, meanB))
print("train 표준편차: {:.6f}, {:.6f}, {:.6f}".format(stdR, stdG, stdB))

train 평균: 0.432652, 0.403058, 0.393747
train 표준편차: 0.182872, 0.175091, 0.164300


In [12]:
meanRGB = [np.mean(x, axis=(0,1)) for x in test_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in test_imgs]

meanR = np.mean([m[0] for m in meanRGB])/255
meanG = np.mean([m[1] for m in meanRGB])/255
meanB = np.mean([m[2] for m in meanRGB])/255

stdR = np.mean([s[0] for s in stdRGB])/255
stdG = np.mean([s[1] for s in stdRGB])/255
stdB = np.mean([s[2] for s in stdRGB])/255

print("test 평균: {:.6f}, {:.6f}, {:.6f}".format(meanR, meanG, meanB))
print("test 표준편차: {:.6f}, {:.6f}, {:.6f}".format(stdR, stdG, stdB))

test 평균: 0.417876, 0.392716, 0.386246
test 표준편차: 0.196169, 0.190939, 0.181401


In [13]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode == 'train':
            train_transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.RandomAffine((-45, 45)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.432652, 0.403058, 0.393747],
                                     std = [0.182872, 0.175091, 0.164300])
            ])
            img = train_transform(img)
        if self.mode == 'test':
            test_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.417876, 0.392716, 0.386246],
                                     std = [0.196169, 0.190939, 0.181400])
            ])
            img = test_transform(img)
        
        label = self.labels[idx]
        return img, label

In [14]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [29]:
batch_size = 64
epochs = 30

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [30]:
gc.collect()
torch.cuda.empty_cache()

In [33]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

model = timm.create_model('vit_tiny_patch16_224_in21k', pretrained=True, num_classes=88).to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005)
criterion = nn.CrossEntropyLoss()
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

best=0
train_losses = []
for epoch in range(epochs):
    GPUtil.showUtilization()
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        torch.cuda.empty_cache()
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.set_grad_enabled(True):
            torch.cuda.empty_cache()
            pred = model(x)
        loss = criterion(pred, y)
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    scheduler.step()
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
    train_losses.append(train_loss)

| ID | GPU | MEM |
------------------
|  0 |  8% | 86% |
epoch : 1/30    time : 39s/1119s
TRAIN    loss : 1.20171    f1 : 0.15353
| ID | GPU | MEM |
------------------
|  0 | 92% | 80% |
epoch : 2/30    time : 38s/1077s
TRAIN    loss : 0.73142    f1 : 0.19811
| ID | GPU | MEM |
------------------
|  0 | 87% | 80% |
epoch : 3/30    time : 38s/1037s
TRAIN    loss : 0.61883    f1 : 0.25742
| ID | GPU | MEM |
------------------
|  0 | 90% | 80% |
epoch : 4/30    time : 39s/1001s
TRAIN    loss : 0.54214    f1 : 0.32401
| ID | GPU | MEM |
------------------
|  0 | 86% | 80% |
epoch : 5/30    time : 39s/963s
TRAIN    loss : 0.49435    f1 : 0.37883
| ID | GPU | MEM |
------------------
|  0 | 87% | 80% |
epoch : 6/30    time : 38s/921s
TRAIN    loss : 0.44596    f1 : 0.41644
| ID | GPU | MEM |
------------------
|  0 | 87% | 80% |
epoch : 7/30    time : 38s/884s
TRAIN    loss : 0.42492    f1 : 0.45989
| ID | GPU | MEM |
------------------
|  0 | 91% | 80% |
epoch : 8/30    time : 38s/845s
TRAI

In [34]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        torch.cuda.empty_cache()
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        torch.cuda.empty_cache()
        pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [35]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

### 제출물 생성

In [36]:
submission = pd.read_csv("open/sample_submission.csv")

submission["label"] = f_result

submission

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-good


In [37]:
submission.to_csv("vit_tiny_bs=64_epoch=30.csv", index = False)

In [38]:
torch.save(model, f'vit_tiny_bs=64_epoch=30.pt')

In [39]:
import pickle

with open('train_losses_vit.p', 'wb') as f:
    pickle.dump(train_losses, f)