## 0. Libarary 불러오기 및 경로설정

In [1]:
!pip install seaborn



In [2]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
from PIL import Image
from pandas import DataFrame
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import torch.optim as optim

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

from torchsummary import summary


from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
import torchvision.models as models
import timm

import wandb

In [3]:
model_list=['resnext50_32x4d','resnext101_32x8d','vit_base_patch16_224','my_model','vgg16']
wandb.init(project='img-classification-38', entity='zeus0007',config = {
    'learning_rate':0.01,
    'batch_size':64,
    'epoch':30,
    'model':'resnext50_32x4d',
})
config = wandb.config

[34m[1mwandb[0m: Currently logged in as: [33mzeus0007[0m (use `wandb login --relogin` to force relogin)


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
# 경로 설정
TRAIN_MASK_PATH = {'label':'/opt/ml/input/data/train/train.csv','images':'/opt/ml/input/data/train/images','new':'/opt/ml/input/data/train/new_train.csv'}
TEST_MASK_PATH = '/input/data/eval'

In [6]:
#make transforms
#넣을것 리사이즈, 좌우반전, shiftscalerotate, 가우시안 노이즈, 노말라이즈
# 내가 설정한것 : transform 종류 & 얼만큼빈도로 적용할건지, 노말라이즈 정도는 얼마로 할건지
# albumentations넣을건지
import albumentations as A
from albumentations.pytorch import ToTensorV2
def transforms(train=True, img_size=(256, 192), mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)):
    if train:
        transform = A.Compose([
            A.Resize(img_size[0], img_size[1], p=1.0),
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            A.GaussNoise(p=0.5),
            A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
    else:
        transform = A.Compose([
            A.Resize(img_size[0], img_size[1]),
            A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
    return transform

In [7]:
#TODOS:데이터셋 만들기
class MaskDataset(Dataset):
    def __init__(self, path,train=True):
        # TODOS:csv 가져오기
        data = pd.read_csv(path['new'])
        image_path = data['abs_path']
        
        self.classified_labels = data['class']
        self.images_full_path = image_path
        
#         self.images = np.array([Image.open(image_full_path) for image_full_path in tqdm(images_full_path)])

    def set_transform(self,transform):
        self.transform = transform
        
    def __len__(self):
        return self.images_full_path.shape[0]
    
    def __getitem__(self,idx):
        
        image_path = self.images_full_path[idx]
        image = Image.open('/opt/ml/'+image_path)
        y = self.classified_labels[idx]
        
        X = self.transform(image=np.array(image))['image']
        type(X)
        return X,y

In [8]:
train_mask_dataset = MaskDataset(TRAIN_MASK_PATH, train=True)

n_val = int(len(train_mask_dataset) * 0.2)
n_train = len(train_mask_dataset) - n_val
train_dataset, val_dataset = random_split(train_mask_dataset, [n_train, n_val])
print(train_dataset)


train_dataset.dataset.set_transform(transforms(train=True))
val_dataset.dataset.set_transform(transforms(train=False))

<torch.utils.data.dataset.Subset object at 0x7f94480eca00>


In [9]:
len(train_mask_dataset)

18899

In [10]:
val_dataset.dataset[0]

(tensor([[[ 0.8482,  0.8482,  0.8482,  ...,  1.0137,  1.0137,  1.0137],
          [ 0.8482,  0.8482,  0.8482,  ...,  1.0137,  1.0137,  1.0137],
          [ 0.8482,  0.8482,  0.8482,  ...,  1.0137,  1.0137,  1.0137],
          ...,
          [ 0.2360,  0.2360,  0.2029,  ...,  0.0208,  0.0705,  0.1201],
          [ 0.1863,  0.1863,  0.1532,  ...,  0.1036,  0.1532,  0.1698],
          [ 0.0870,  0.0870,  0.0539,  ...,  0.1367,  0.2194,  0.2360]],
 
         [[ 0.9761,  0.9761,  0.9761,  ...,  1.1349,  1.1349,  1.1349],
          [ 0.9761,  0.9761,  0.9761,  ...,  1.1349,  1.1349,  1.1349],
          [ 0.9761,  0.9761,  0.9761,  ...,  1.1349,  1.1349,  1.1349],
          ...,
          [-0.4211, -0.4211, -0.4528,  ..., -0.5957, -0.5481, -0.5163],
          [-0.4687, -0.4687, -0.5004,  ..., -0.5798, -0.5481, -0.5481],
          [-0.5639, -0.5639, -0.5957,  ..., -0.6116, -0.5481, -0.5322]],
 
         [[ 1.0020,  1.0020,  1.0020,  ...,  1.1614,  1.1614,  1.1614],
          [ 1.0020,  1.0020,

In [11]:
train_loader = DataLoader(
    train_dataset,
    batch_size=config.batch_size,
    num_workers=4,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.batch_size,
    num_workers=4,
    shuffle=False
)

In [12]:
class BasicBlock(nn.Module):
    
    def __init__(self, in_dim, mid_dim,out_dim, stride=1):
        super().__init__()

        #residual function
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=1, padding=0, bias=False, stride=1),
        )
        
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_dim, mid_dim, kernel_size=1, padding=0, bias=False, stride=1),
            nn.BatchNorm2d(mid_dim),
            nn.ReLU(inplace= True),
            nn.Conv2d(mid_dim, out_dim, kernel_size=3, padding=1, bias=False, stride=1),
            nn.BatchNorm2d(out_dim)
        )

    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
    
def make_layer(in_dim, mid_dim, out_dim, repeats, starting=False):
        layers = []
        layers.append(BasicBlock(in_dim, mid_dim, out_dim))
        for _ in range(1, repeats):
            layers.append(BasicBlock(out_dim, mid_dim, out_dim))
        return nn.Sequential(*layers)


class ResNet(nn.Module):

    def __init__(self, num_classes=100):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False, stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True))
        base_dim = 64
        self.conv2_x = make_layer(base_dim, 32, base_dim, 1)
        self.s_conv1 = self.simple_conv(base_dim, base_dim*2)
        self.conv3_x = make_layer(base_dim*2, base_dim, base_dim*2, 2)
        self.s_conv2 = self.simple_conv(base_dim*2, base_dim*4)
        self.conv4_x = make_layer(base_dim*4, base_dim*2, base_dim*4, 8)
        self.s_conv3 = self.simple_conv(base_dim*4, base_dim*8)
        self.conv5_x = make_layer(base_dim*8, base_dim*4, base_dim*8, 8)
        self.s_conv4 = self.simple_conv(base_dim*8, base_dim*16)
        self.conv6_x = make_layer(base_dim*16, base_dim*8, base_dim*16, 4)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, num_classes)
    
    def simple_conv(self, in_dim, out_dim):
        return nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=3, padding=1, bias=False, stride=2),
            nn.BatchNorm2d(out_dim),
            nn.ReLU(inplace= True))

    def forward(self, x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        output = self.s_conv1(output)
        output = self.conv3_x(output)
        output = self.s_conv2(output)
        output = self.conv4_x(output)
        output = self.s_conv3(output)
        output = self.conv5_x(output)
        output = self.s_conv4(output)
        output = self.conv6_x(output)
        output = self.avg_pool(output)
        output = output.view(output.size(0), -1)
        output = self.fc(output)
        return output



In [13]:
def config_model(model):
    if model == 'resnext50_32x4d':
        return models.resnext50_32x4d(pretrained=True).to(device)
    elif model == 'my_model':
        return ResNet(18).to(device)
    elif model == 'resnext101_32x8d':
        return models.resnext101_32x8d(pretrained=True).to(device)
    elif model == 'vit_base_patch16_224':
        return timm.create_model('vit_base_patch16_224',pretrained=True).to(device)
    elif model == 'vgg16':
        return models.vgg16(pretrained=True).to(device)
    elif model == 'resnet156':
        return models.
        

In [14]:
model = config_model(config.model)
num_features = model.fc.in_features
print(num_features)
model.fc = nn.Linear(num_features, 18).to(device)

2048


In [15]:
lr = config.learning_rate
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
image_datasets = {
    'train':train_dataset,
    'validation':val_dataset}
dataloaders = {'train':train_loader, 'validation':val_loader}

In [16]:
def train_model(model, criterion, optimizer, num_epochs=3):
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])
            wandb.log({f"{phase}_acc":epoch_acc, f"{phase}_loss":epoch_loss})
            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
    return model

In [18]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [None]:
from torchvision import transforms
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
model = train_model(model, criterion, optimizer, num_epochs=config.epoch)
model.eval()

print('done')

Epoch 1/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 2.3973, acc: 0.2610


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 2.0120, acc: 0.3289
Epoch 2/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 1.4206, acc: 0.5006


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 1.6418, acc: 0.4962
Epoch 3/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.8681, acc: 0.6946


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.8197, acc: 0.7184
Epoch 4/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.6733, acc: 0.7555


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 1.0951, acc: 0.6311
Epoch 5/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.5589, acc: 0.7921


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.6038, acc: 0.7835
Epoch 6/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.4746, acc: 0.8250


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.6586, acc: 0.7732
Epoch 7/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.4011, acc: 0.8488


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.6935, acc: 0.7753
Epoch 8/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.3694, acc: 0.8631


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.7066, acc: 0.7716
Epoch 9/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.3198, acc: 0.8780


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.9716, acc: 0.6827
Epoch 10/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.2827, acc: 0.8935


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.5944, acc: 0.8076
Epoch 11/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.2702, acc: 0.8976


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.5728, acc: 0.8185
Epoch 12/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))


train loss: 0.2445, acc: 0.9086


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


validation loss: 0.3926, acc: 0.8550
Epoch 13/30
----------


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=237.0), HTML(value='')))

In [None]:
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []

for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)
print('test inference is done!')