In [2]:
import torch
import torch.nn as nn
from tqdm.notebook import tqdm

from torch.utils.data import DataLoader
from torch.optim import Optimizer

import torch.nn.functional as F

# def loss_func(out, y):
#     return F.binary_cross_entropy_with_logits(out, y)

loss_func = nn.CrossEntropyLoss()

def train_one_epoch(model: nn.Module, optimizer: Optimizer, data_loader: DataLoader, device, epoch):
    model.train()
    model.zero_grad()
    tqdm_dataloader = tqdm(data_loader)
    total_batch = 0
    for targets in tqdm_dataloader:
        images = targets['image'].to(device)
        labels = targets['label'].to(device)
        total_batch += len(images)

        optimizer.zero_grad()
        out = model(images)

        loss = loss_func(out, labels)
        loss.backward()
        optimizer.step()

        tqdm_dataloader.set_description(
            f"Epoch {epoch + 1}, lr is {optimizer.param_groups[0]['lr']:.6f} loss {loss.item():.3f}")

In [3]:
from PIL import Image
from torchvision import transforms

from torch.utils.data import Dataset

class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)


class TrainDataset_01(Dataset):
    def __init__(self, img_paths, labels, transform = None):
        self.img_paths = img_paths
        self.labels = labels
        if transform is None:
            self.transform = transforms.Compose([
                transforms.Resize((512, 384), Image.BILINEAR),
                transforms.ToTensor(),
                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
            ])
        else:
            self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        label = self.labels[index]

        if self.transform:
            image = self.transform(image)
        return {'image': image, 'label': label}
    
    def __len__(self):
        return len(self.img_paths)

In [4]:
import os
import pandas as pd

In [5]:
import torchvision

In [33]:
class PassLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x

In [41]:
class MyModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.imagenet_resnet18 = torchvision.models.resnet18(pretrained=True)
        self.imagenet_resnet18.fc = nn.Sequential()
        """
        1. 위와 같이 생성자의 parameter 에 num_claases 를 포함해주세요.
        2. 나만의 모델 아키텍쳐를 디자인 해봅니다.
        3. 모델의 output_dimension 은 num_classes 로 설정해주세요.
        """

    def forward(self, x):
        """
        1. 위에서 정의한 모델 아키텍쳐를 forward propagation 을 진행해주세요
        2. 결과로 나온 output 을 return 해주세요
        """
        x = self.imagenet_resnet18(x)
        return x

In [42]:
mymodel = MyModel(num_classes=18)

In [43]:
mymodel

MyModel(
  (imagenet_resnet18): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,

In [2]:
imagenet_resnet152 = torchvision.models.resnet152()

In [5]:
imagenet_efficientnet_b7 = torchvision.models.efficientnet_b7(pretrained=True)

In [6]:
imagenet_efficientnet_b7

EfficientNet(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
            (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): ConvNormActivatio

In [7]:
imagenet_efficientnet_b7.classifier = nn.Sequential( nn.Dropout(p=0.5, inplace=True),
                                                        nn.Linear(in_features=2560, out_features=18, bias=True))
nn.init.xavier_uniform_(imagenet_efficientnet_b7.classifier[1].weight)
imagenet_efficientnet_b7.classifier[1].bias.data.uniform_(-0.01, 0.01)

tensor([-0.0055,  0.0004, -0.0085,  0.0055, -0.0056, -0.0019,  0.0058,  0.0100,
         0.0097,  0.0067,  0.0075, -0.0036,  0.0023,  0.0005, -0.0062, -0.0056,
         0.0088, -0.0045])

In [10]:
imagenet_resnet152.fc = nn.Linear(in_features=2048, out_features=18, bias=True)
nn.init.xavier_uniform_(imagenet_resnet152.fc.weight)
imagenet_resnet152.fc.bias.data.uniform_(-0.01, 0.01)

tensor([ 0.0007, -0.0011, -0.0045,  0.0058, -0.0082, -0.0089, -0.0065, -0.0090,
        -0.0093, -0.0073, -0.0004,  0.0010,  0.0091,  0.0097,  0.0046,  0.0023,
         0.0020, -0.0003])

In [13]:
EPOCH = 10
lr = 0.001
device = torch.device('cuda')

TRAIN_DIR = '/opt/ml/input/data/train'
train_info = pd.read_csv(os.path.join(TRAIN_DIR, 'train_info_01.csv'))

image_paths = train_info['path'].values
labels = train_info['label'].values

In [9]:
mymodel = MyModel(num_classes=18)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /opt/ml/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [9]:
mymodel = imagenet_efficientnet_b7

In [27]:
import gc
gc.collect()
torch.cuda.empty_cache()


In [15]:
from torch.optim import Adam
optimizer = Adam(mymodel.parameters(), lr=lr)
train_dataset = TrainDataset_01(image_paths, labels)
train_loader = DataLoader(train_dataset, batch_size=9, shuffle=True)

In [25]:
train_dataset[0]['image'].unsqueeze(-1).shape

torch.Size([3, 512, 384, 1])

In [40]:
for tr in train_loader:
    print(tr['image'].shape)
    print(mymodel(tr['image']))
    print(mymodel(tr['image']).shape)
    break

torch.Size([9, 3, 512, 384])
tensor([[0.7654, 0.9422, 0.7972,  ..., 1.0083, 1.0092, 1.1355],
        [1.3683, 0.5675, 1.0643,  ..., 1.8683, 0.7425, 0.6504],
        [1.1697, 0.7018, 0.6267,  ..., 0.4234, 0.6042, 1.1641],
        ...,
        [1.2429, 0.5828, 0.4835,  ..., 1.5501, 1.0972, 1.1180],
        [0.8898, 1.2122, 1.7035,  ..., 0.9071, 0.9522, 1.0703],
        [0.7992, 0.5486, 0.3371,  ..., 0.6949, 0.8135, 0.4160]],
       grad_fn=<ReshapeAliasBackward0>)
torch.Size([9, 512])


In [44]:
for tr in train_loader:
    print(tr['image'].shape)
    print(mymodel(tr['image']))
    print(mymodel(tr['image']).shape)
    break

torch.Size([9, 3, 512, 384])
tensor([[0.8694, 0.8907, 0.7085,  ..., 1.2971, 0.6992, 1.1134],
        [1.0262, 0.7598, 0.2948,  ..., 1.3202, 0.9475, 1.2000],
        [1.1937, 1.2158, 1.7803,  ..., 0.6100, 1.5026, 0.9482],
        ...,
        [1.3307, 0.5121, 0.3866,  ..., 0.9738, 1.2589, 0.8199],
        [0.9508, 1.0866, 0.2812,  ..., 0.7073, 1.4014, 0.6117],
        [0.2478, 0.7312, 0.5776,  ..., 1.0971, 0.7107, 0.9167]],
       grad_fn=<ReshapeAliasBackward0>)
torch.Size([9, 512])


In [26]:
mymodel(train_dataset[0]['image'])

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 512, 384, 1] to have 3 channels, but got 512 channels instead

In [12]:
mymodel.to(device)

EfficientNet(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
            (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): ConvNormActivatio

In [29]:
train_one_epoch(mymodel, optimizer, train_loader, device, 0)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2100.0), HTML(value='')))




In [10]:
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
test_image_dir = os.path.join(test_dir, 'images')
test_image_paths = [os.path.join(test_image_dir, img_id) for img_id in submission.ImageID]

In [11]:
transform = transforms.Compose([
    transforms.Resize((512, 384), Image.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(test_image_paths, transform)
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)



In [32]:
mymodel.eval()
all_predictions = []
tqdm_test_dataloader = tqdm(test_loader)
for images in tqdm_test_dataloader:
    with torch.no_grad():
        images = images.to(device)
        pred = mymodel(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=394.0), HTML(value='')))




In [33]:
submission

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,13
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,1
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,13
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12
...,...,...
12595,d71d4570505d6af8f777690e63edfa8d85ea4476.jpg,1
12596,6cf1300e8e218716728d5820c0bab553306c2cfd.jpg,4
12597,8140edbba31c3a824e817e6d5fb95343199e2387.jpg,9
12598,030d439efe6fb5a7bafda45a393fc19f2bf57f54.jpg,1


In [34]:
submission.to_csv('submission_efnet.csv', index=False)