In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torch.autograd import Variable
from torch import optim
from torchsummary import summary

import os
import cv2
import random
import time

from PIL import Image
from tqdm import tqdm_notebook as tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# 데이터 전처리 클래스
class ImageTransform():
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train' : transforms.Compose([
                transforms.RandomResizedCrop(resize, scale= (0.5,1.0)),
                transforms.RandomHorizontalFlip(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }

    def __call__(self, img, phase):
        return self.data_transform[phase](img)

In [3]:
# 데이터 로드, train, val, test set으로 분리
cat_dir = 'data/dogs-vs-cats/Cat/'
dog_dir = 'data/dogs-vs-cats/Dog/'

cat_images_filepaths = sorted([os.path.join(cat_dir, f) for f in os.listdir(cat_dir)])
dog_images_filepaths = sorted([os.path.join(dog_dir, f) for f in os.listdir(dog_dir)])
images_filepaths = [*cat_images_filepaths, *dog_images_filepaths]
correct_images_filepaths = [i for i in images_filepaths if cv2.imread(i) is not None]

random.seed(42)
random.shuffle(correct_images_filepaths)
train_images_filepaths = correct_images_filepaths[:400]
val_images_filepaths = correct_images_filepaths[400:-10]
test_images_filepaths = correct_images_filepaths[-10:]

print(len(train_images_filepaths), len(val_images_filepaths), len(test_images_filepaths))

400 92 10


In [4]:
# 커스텀 데이터셋 정의
class DogvsCatDataset(Dataset):
    def __init__(self, file_list, transform= None, phase= 'train'):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img, self.phase)

        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label = 1
        elif label == 'cat':
            label = 0

        return img_transformed, label

In [5]:
# 변수 값 정의
size = 256
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

# train, val, test set
train_dataset = DogvsCatDataset(train_images_filepaths, transform= ImageTransform(size, mean, std), phase= 'train')
val_dataset = DogvsCatDataset(val_images_filepaths, transform= ImageTransform(size, mean, std), phase= 'val')
test_dataset = DogvsCatDataset(test_images_filepaths, transform= ImageTransform(size, mean, std), phase= 'test')

# 첫번째 데이터의 size와 label 출력
index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])

torch.Size([3, 256, 256])
0


In [6]:
# dataloader
train_dataloader = DataLoader(train_dataset, batch_size= batch_size, shuffle= True)
val_dataloader = DataLoader(val_dataset, batch_size= batch_size, shuffle= False)
test_dataloader = DataLoader(test_dataset, batch_size= batch_size, shuffle= False)
dataloader_dict = {'train' : train_dataloader, 'val' : val_dataloader}

batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

torch.Size([32, 3, 256, 256])
tensor([0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
        0, 1, 0, 0, 1, 0, 1, 0])


In [7]:
class AlexNet(nn.Module):
    def __init__(self) -> None:
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size= 11, stride= 4, padding= 2),
            nn.ReLU(inplace= True),
            nn.MaxPool2d(kernel_size= 5, padding= 2),
            
            nn.Conv2d(64, 192, kernel_size= 5, padding= 2),
            nn.ReLU(inplace= True),
            nn.MaxPool2d(kernel_size= 3, stride= 2),

            nn.Conv2d(192, 384, kernel_size= 3, padding= 1),
            nn.ReLU(inplace= True),
            
            nn.Conv2d(384, 256, kernel_size= 3, padding= 1),
            nn.ReLU(inplace= True),
            
            nn.Conv2d(256, 256, kernel_size= 3, padding= 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size= 3, stride= 2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace= True),
            nn.Dropout(),
            nn.Linear(4096, 512),
            nn.ReLU(inplace= True),
            nn.Linear(512, 2),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

In [8]:
# 모델 객체 생성
model = AlexNet()
model.to(device)

# opt, loss
opt = optim.SGD(model.parameters(), lr= 0.001, momentum= 0.9)
criterion = nn.CrossEntropyLoss()

In [9]:
# 네트워크 구조 확인
summary(model, input_size= (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 63, 63]          23,296
              ReLU-2           [-1, 64, 63, 63]               0
         MaxPool2d-3           [-1, 64, 13, 13]               0
            Conv2d-4          [-1, 192, 13, 13]         307,392
              ReLU-5          [-1, 192, 13, 13]               0
         MaxPool2d-6            [-1, 192, 6, 6]               0
            Conv2d-7            [-1, 384, 6, 6]         663,936
              ReLU-8            [-1, 384, 6, 6]               0
            Conv2d-9            [-1, 256, 6, 6]         884,992
             ReLU-10            [-1, 256, 6, 6]               0
           Conv2d-11            [-1, 256, 6, 6]         590,080
             ReLU-12            [-1, 256, 6, 6]               0
        MaxPool2d-13            [-1, 256, 2, 2]               0
AdaptiveAvgPool2d-14            [-1, 25

In [12]:
# 모델 학습 함수
def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):
    since = time.time()
    best_acc = 0.0

    for epoch in range(num_epoch):
        print(f'Epoch {epoch + 1}/{num_epoch}')
        print('-'*20)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            epoch_loss = 0.0
            epoch_corrects = 0

            for inputs, labels in tqdm(dataloader_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)

            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')

    return model

In [13]:
# 모델 학습
num_epoch = 10
model = train_model(model, dataloader_dict, criterion, opt, num_epoch)

Epoch 1/10
--------------------


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for inputs, labels in tqdm(dataloader_dict[phase]):


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6939 Acc: 0.4975


  0%|          | 0/3 [00:00<?, ?it/s]

KeyError: 'val'

In [None]:

# 예측
import pandas as pd
id_list = []
pred_list = []
_id = 0
with torch.no_grad():
    for test_path in tqdm(test_images_filepaths):
        img = Image.open(test_path)
        _id = test_path.split('/')[-1].split('.')[1]