## 데이터 압축 풀기

In [1]:
# import zipfile

# # 전체 데이터를 묶고 있는 압축파일 풀기
# zip_all = zipfile.ZipFile('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\2차 배포.zip')
# zip_all.extractall('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data')
# zip_all.close()

# # train data 압축풀기
# zip_train = zipfile.ZipFile('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\dirty_mnist_2nd.zip')
# zip_train.extractall('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\train')
# zip_train.close()

# # test data 압축풀기
# zip_test = zipfile.ZipFile('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\test_dirty_mnist_2nd.zip')
# zip_test.extractall('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\test')
# zip_test.close()

# # csv file 압축풀기
# zip_csv = zipfile.ZipFile('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data\\mnist_data.zip')
# zip_csv.extractall('C:\\Users\\pc\\Jupyter-workspace\\제2회컴퓨터비전학습경진대회\\data')
# zip_csv.close()

In [2]:
# https://code.tutsplus.com/ko/tutorials/compressing-and-extracting-files-in-python--cms-26816

## 라이브러리 및 데이터 불러오기

In [3]:
import os
from typing import Tuple, Sequence, Callable
import sys
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary

from torchvision import transforms, models
from torchvision.models import resnet50

In [4]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png')).convert('RGB')
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

## image augmentation

In [5]:
transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [6]:
trainset = MnistDataset('data/train', 'data/dirty_mnist_2nd_answer.csv', transforms_train)
testset = MnistDataset('data/test', 'data/sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=256, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

In [7]:
class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MnistModel().to(device)
print(summary(model, input_size=(1, 3, 256, 256), verbose=0))

Layer (type:depth-idx)                   Output Shape              Param #
├─ResNet: 1-1                            [1, 1000]                 --
|    └─Conv2d: 2-1                       [1, 64, 128, 128]         9,408
|    └─BatchNorm2d: 2-2                  [1, 64, 128, 128]         128
|    └─ReLU: 2-3                         [1, 64, 128, 128]         --
|    └─MaxPool2d: 2-4                    [1, 64, 64, 64]           --
|    └─Sequential: 2-5                   [1, 256, 64, 64]          --
|    |    └─Bottleneck: 3-1              [1, 256, 64, 64]          75,008
|    |    └─Bottleneck: 3-2              [1, 256, 64, 64]          70,400
|    |    └─Bottleneck: 3-3              [1, 256, 64, 64]          70,400
|    └─Sequential: 2-6                   [1, 512, 32, 32]          --
|    |    └─Bottleneck: 3-4              [1, 512, 32, 32]          379,392
|    |    └─Bottleneck: 3-5              [1, 512, 32, 32]          280,064
|    |    └─Bottleneck: 3-6              [1, 512, 32, 32]  

In [13]:
from errno import EPIPE

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 10
model.train()

def main():
    for epoch in range(num_epochs):
        try:
            for i, (images, targets) in enumerate(train_loader):
                optimizer.zero_grad()

                images = images.to(device)
                targets = targets.to(device)

                outputs = model(images)
                loss = criterion(outputs, targets)

                loss.backward()
                optimizer.step()

                if (i+1) % 10 == 0:
                    outputs = outputs > 0.5
                    acc = (outputs == targets).float().mean()
                    print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')
                    
        except broken_pipe_exception as exc:
            if broken_pipe_exception == IOError:
                if exc.errno != EPIPE:
                    raise

In [15]:
submit = pd.read_csv('data/sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
try:
    for i, (images, targets) in enumerate(test_loader):
        images = images.to(device)
        targets = targets.to(device)
        outputs = model(images)
        outputs = outputs > 0.5
        batch_index = i * batch_size
        submit.iloc[batch_index:batch_index+batch_size, 1:] = \
            outputs.long().squeeze(0).detach().cpu().numpy()
        
except broken_pipe_exception as exc:
    if broken_pipe_exception == IOError:
        if exc.errno != EPIPE:
            raise
    
submit.to_csv('submit.csv', index=False)

NameError: name 'broken_pipe_exception' is not defined