라이브러리 call

In [530]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from PIL import Image
from sklearn.metrics import f1_score, accuracy_score

device = torch.device('cuda')

데이터 경로 설정

In [28]:
train_png = sorted(glob('./train/train/*.png'))
test_png = sorted(glob('./test/test/*.png'))

In [29]:
train_y = pd.read_csv("/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [30]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (512, 512))
    return img

In [217]:
train_imgs = [img_load(m) for m in tqdm(train_png)]
test_imgs = [img_load(n) for n in tqdm(test_png)]

100%|██████████| 4277/4277 [01:06<00:00, 64.52it/s]
100%|██████████| 2154/2154 [00:35<00:00, 60.40it/s]


In [32]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode=='train':
            augmentation = random.randint(0,2)
            if augmentation==1:
                img = img[::-1].copy()
            elif augmentation==2:
                img = img[:,::-1].copy()
        img = transforms.ToTensor()(img)
        if self.mode=='test':
            pass
        
        label = self.labels[idx]
        return img, label
    
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [33]:
batch_size = 32
epochs = 25
# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [34]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

In [41]:
model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 

best=0
for epoch in range(epochs):
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    print(f'epoch : {epoch+1}/{epochs}')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

epoch : 1/25
TRAIN    loss : 0.99978    f1 : 0.23646
epoch : 2/25
TRAIN    loss : 0.55692    f1 : 0.35786
epoch : 3/25
TRAIN    loss : 0.40298    f1 : 0.49326
epoch : 4/25
TRAIN    loss : 0.28930    f1 : 0.59968
epoch : 5/25
TRAIN    loss : 0.23761    f1 : 0.66999
epoch : 6/25
TRAIN    loss : 0.22199    f1 : 0.70257
epoch : 7/25
TRAIN    loss : 0.20076    f1 : 0.71952
epoch : 8/25
TRAIN    loss : 0.13472    f1 : 0.81569
epoch : 9/25
TRAIN    loss : 0.16176    f1 : 0.79467
epoch : 10/25
TRAIN    loss : 0.12903    f1 : 0.83167
epoch : 11/25
TRAIN    loss : 0.11012    f1 : 0.85520
epoch : 12/25
TRAIN    loss : 0.11003    f1 : 0.86250
epoch : 13/25
TRAIN    loss : 0.06751    f1 : 0.90157
epoch : 14/25
TRAIN    loss : 0.05496    f1 : 0.91693
epoch : 15/25
TRAIN    loss : 0.06610    f1 : 0.91968
epoch : 16/25
TRAIN    loss : 0.05265    f1 : 0.94289
epoch : 17/25
TRAIN    loss : 0.07879    f1 : 0.91712
epoch : 18/25
TRAIN    loss : 0.07088    f1 : 0.92733
epoch : 19/25
TRAIN    loss : 0.07873

In [42]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [43]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [50]:
submission = pd.read_csv("./sample_submission.csv")

submission["label"] = f_result

submission.to_csv('result.csv')

In [45]:
len(test_imgs)

2154

## VIT

In [46]:
from einops import rearrange, repeat
from einops.layers.torch import Rearrange

In [67]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [557]:
transform_function_set = transforms.Compose([
                         transforms.RandomHorizontalFlip(), 
                         transforms.RandomVerticalFlip(), 
                         transforms.RandomRotation(0.02),
                         AddGaussianNoise(0., 1.),
                         transforms.ToTensor()])

torchvision_transform = transforms.Compose([
    transforms.Resize(512),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomVerticalFlip(), 
    transforms.RandomRotation(0.02),
    transforms.ToTensor(),
    AddGaussianNoise(0., 1.)])                   

class MyDataset(Dataset):
    def __init__(self, path, label_file_path=None,transform=None, mode='train'):
        super().__init__()

        self.img_list = sorted(glob(path+'/*.png'))
        
        if label_file_path !=None:
            train_y = pd.read_csv(label_file_path)
            train_labels = train_y["label"]
            label_unique = sorted(np.unique(train_labels))
            label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}
            train_labels = [label_unique[k] for k in train_labels]
            self.labels = train_labels

        self.mode=mode
        self.transform = transform
        self.seed=2022

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):

        if self.mode=='train':
            img_path = self.img_list[idx]
            label = self.labels[idx]
            img = Image.open(img_path).convert("RGB")

            if self.transform !=None:
                images = self.transform(img)
                #labels = self.transform(label)
                
                return images,label

        if self.mode=='test':
            pass

        label = self.labels[idx]
        return img, label

In [558]:
#train_imgs= [Image.fromarray(img) for img in np.array(train_imgs)]

In [559]:
train_dataset = MyDataset('./train/train/', './train_df.csv',transform=torchvision_transform, mode='train')

In [560]:
train_loader = DataLoader(train_dataset,batch_size=batch_size, shuffle=True)

In [562]:
model = Network().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 
best=0
for epoch in range(100):
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    print(f'epoch : {epoch+1}/{epochs}')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')




epoch : 1/25
TRAIN    loss : 1.19522    f1 : 0.15364
epoch : 2/25
TRAIN    loss : 0.81535    f1 : 0.17757
epoch : 3/25
TRAIN    loss : 0.74650    f1 : 0.18933


KeyboardInterrupt: 

In [None]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [None]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

<__main__.MyDataset at 0x14fc8398670>

In [None]:
submission = pd.read_csv("./sample_submission.csv")

submission["label"] = f_result

submission.to_csv('result.csv')

In [None]:
from vit_pytorch.regionvit import RegionViT

model = RegionViT(
    dim = (64, 128, 256, 512),      # tuple of size 4, indicating dimension at each stage
    depth = (2, 2, 8, 2),           # depth of the region to local transformer at each stage
    window_size = 14,                # window size, which should be either 7 or 14
    num_classes = len(label_unique),             # number of output classes
    tokenize_local_3_conv = False,  # whether to use a 3 layer convolution to encode the local tokens from the image. the paper uses this for the smaller models, but uses only 1 conv (set to False) for the larger models
    use_peg = False,                # whether to use positional generating module. they used this for object detection for a boost in performance
)


In [None]:
la

In [565]:
len(label_unique)

88