In [1]:
!wget https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
!unzip -qq cats_and_dogs_filtered.zip

--2020-06-03 17:20:56--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.189.128, 2404:6800:4008:c04::80
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.189.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘cats_and_dogs_filtered.zip’


2020-06-03 17:20:58 (85.4 MB/s) - ‘cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [0]:
import os
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from PIL import Image

class PyTorchCustomDataset(Dataset):
    def __init__(self
                 , root_dir = "/content/cats_and_dogs_filtered/train"
                 , transform = None):
        self.image_abs_path = root_dir
        self.transform = transform
        self.label_list = os.listdir(self.image_abs_path)
        self.label_list.sort()
        self.x_list = []
        self.y_list = []
        for label_index, label_str in enumerate(self.label_list):
            img_path = os.path.join(self.image_abs_path, label_str)
            img_list = os.listdir(img_path)
            for img in img_list:
                self.x_list.append(os.path.join(img_path, img))
                self.y_list.append(label_index)
        pass

    def __len__(self):
        return len(self.x_list)

    def __getitem__(self, idx):
        image = Image.open(self.x_list[idx])
        if image.mode is not "RGB":
            image = image.convert('RGB')
        if self.transform is not None:
            image = self.transform(image)
        return image, self.y_list[idx]

    def __save_label_map__(self, dst_text_path = "label_map.txt"):
        label_list = self.label_list
        f = open(dst_text_path, 'w')
        for i in range(len(label_list)):
            f.write(label_list[i]+'\n')
        f.close()
        pass

    def __num_classes__(self):
        return len(self.label_list)

In [0]:
import torch
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F

class MODEL(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.network = models.resnet18(pretrained=True)
        self.classifier = nn.Sequential(
            nn.Dropout()
            , nn.Linear(1000, num_classes)
            , nn.Sigmoid()
        )
    def forward(self, x):
        x = self.network(x)
        return self.classifier(x)

In [0]:
import torch
import torch.optim as optim

def main():
    USE_CUDA = torch.cuda.is_available()
    DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

    img_width, img_height = 224, 224
    EPOCHS     = 20
    BATCH_SIZE = 16
    transform_train = transforms.Compose([
                transforms.Resize(size=(img_width, img_height))
                , transforms.RandomRotation(degrees=15)
                , transforms.ToTensor()
                ])
    transform_test = transforms.Compose([
                transforms.Resize(size=(img_width, img_height))
                , transforms.ToTensor()
                ])

    TrainDataset = PyTorchCustomDataset
    TestDataset = PyTorchCustomDataset

    train_data = TrainDataset(root_dir = "/content/cats_and_dogs_filtered/train"
                    , transform = transform_train)
    test_data = TestDataset(root_dir = "/content/cats_and_dogs_filtered/validation"
                    , transform = transform_test)
    
    train_loader = torch.utils.data.DataLoader(
        train_data
        , batch_size=BATCH_SIZE
        , shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        test_data
        , batch_size=BATCH_SIZE
        , shuffle=True
    )
    
    train_data.__save_label_map__()
    num_classes = train_data.__num_classes__()

    model = MODEL(num_classes).to(DEVICE)
    model_str = "PyTorch_Classification_Model"
    model_str += ".pt" 
    optimizer = optim.SGD(model.parameters(), lr=0.0001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
    acc = 0.0
    for epoch in range(1, EPOCHS + 1):
        model.train()
        for data, target in (train_loader):
            data, target = data.to(DEVICE), target.to(DEVICE)
            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()
        scheduler.step()

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in (test_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)

                # 배치 오차를 합산
                test_loss += F.cross_entropy(output, target,
                                            reduction='sum').item()

                # 가장 높은 값을 가진 인덱스가 바로 예측값
                pred = output.max(1, keepdim=True)[1]
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        test_accuracy = 100. * correct / len(test_loader.dataset)

        print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
                epoch, test_loss, test_accuracy))

        if acc < test_accuracy:
            acc = test_accuracy
            torch.save(model.state_dict(), model_str)
            print("model saved!")

In [10]:
main()


[1] Test Loss: 0.6822, Accuracy: 57.60%
model saved!
[2] Test Loss: 0.6007, Accuracy: 78.10%
model saved!
[3] Test Loss: 0.5388, Accuracy: 87.10%
model saved!
[4] Test Loss: 0.5030, Accuracy: 90.80%
model saved!
[5] Test Loss: 0.4741, Accuracy: 92.50%
model saved!
[6] Test Loss: 0.4514, Accuracy: 93.40%
model saved!
[7] Test Loss: 0.4380, Accuracy: 94.10%
model saved!
[8] Test Loss: 0.4258, Accuracy: 94.50%
model saved!
[9] Test Loss: 0.4174, Accuracy: 94.30%
[10] Test Loss: 0.4091, Accuracy: 95.30%
model saved!
[11] Test Loss: 0.4046, Accuracy: 94.50%
[12] Test Loss: 0.3995, Accuracy: 95.20%
[13] Test Loss: 0.3950, Accuracy: 95.70%
model saved!
[14] Test Loss: 0.3889, Accuracy: 95.70%
[15] Test Loss: 0.3899, Accuracy: 95.70%
[16] Test Loss: 0.3882, Accuracy: 95.00%
[17] Test Loss: 0.3817, Accuracy: 95.90%
model saved!
[18] Test Loss: 0.3815, Accuracy: 95.70%
[19] Test Loss: 0.3789, Accuracy: 96.20%
model saved!
[20] Test Loss: 0.3758, Accuracy: 96.30%
model saved!


In [12]:
!ls -al -h

total 111M
drwxr-xr-x 1 root root 4.0K Jun  3 17:22 .
drwxr-xr-x 1 root root 4.0K Jun  3 17:15 ..
drwxr-x--- 4 root root 4.0K Sep 23  2016 cats_and_dogs_filtered
-rw-r--r-- 1 root root  66M Mar 28  2018 cats_and_dogs_filtered.zip
drwxr-xr-x 1 root root 4.0K Jun  2 16:14 .config
-rw-r--r-- 1 root root   10 Jun  3 17:27 label_map.txt
-rw-r--r-- 1 root root  45M Jun  3 17:32 PyTorch_Classification_Model.pt
drwxr-xr-x 1 root root 4.0K May 29 18:19 sample_data
