# 캐글의 개와 고양이를 구분하는 대회 데이터셋

*   개 12500, 고양이 12500 개의 사진이 존재하며 컬러 데이터
*   https://www.kaggle.com/c/dogs-vs-cats

# CNN을 활용하여 개,고양이를 구분하는 모델 개발


1.   기본적인 CNN모델
2.   residual learning 을 활용하여 모델 고도화
3.   transfer learning with VGG net




In [1]:
# !pip install torchviz
import os

import torch
import torch.nn as nn
import torch.optim as optim

from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets,transforms
from torchvision import models
from torchviz import make_dot

In [2]:
dataset = datasets.ImageFolder(root ='train/',
                               transform=transforms.Compose([transforms.Resize(256),
                                                             transforms.ColorJitter(),
                                                             transforms.RandomCrop(224),
                                                             transforms.RandomHorizontalFlip(),
                                                             transforms.Resize(128),
                                                             transforms.ToTensor(),
                                                             transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])
                               ]))

train_set, val_set = torch.utils.data.random_split(dataset,[int(len(dataset)*0.8),int(len(dataset)*0.2)])

train_dataloader = DataLoader(train_set,batch_size=1024,
                        shuffle=True,num_workers=8,
                        drop_last=True,pin_memory=True)

val_dataloader = DataLoader(val_set)

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
        nn.Conv2d(3,4,kernel_size=3,padding=1), # [batchsize,3,128,128] -> [batchsize,3,128,128]
        nn.ReLU(),
        nn.MaxPool2d(2,2), # [batchsize,3,128,128] -> [batchsize,3,64,64]
            
        nn.Conv2d(4,8,kernel_size=3,padding=1), # [batchsize,3,64,64] -> [batchsize,3,64,64]
        nn.ReLU(),
        nn.Conv2d(8,16,kernel_size=3,padding=1), # [batchsize,3,64,64] -> [batchsize,3,64,64]
        nn.ReLU(),
        nn.MaxPool2d(2,2)) # [batchsize,3,64,64] -> [batchsize,3,32,32]
        
        
        self.fc_layer = nn.Sequential(
        nn.Linear(16*32*32,128),
        nn.ReLU(),
        nn.Linear(128,2))
    
    def forward(self,x):
        out = self.layer(x)
        out = out.view(x.size(0),-1)
        out = self.fc_layer(out)
        return nn.LogSoftmax(dim=1)(out)

In [4]:
# cnn_viz = make_dot(CNN()(torch.randn(1,3,128,128)))
# cnn_viz

In [5]:
class My_ResNet(nn.Module):
    def __init__(self):
        super(My_ResNet,self).__init__()
        self.layer1 = nn.Sequential(
        nn.Conv2d(3,4,kernel_size=3,padding=1), # [batchsize,3,128,128] -> [batchsize,4,128,128]
        nn.ReLU(),
        nn.MaxPool2d(2,2), # [batchsize,4,128,128] -> [batchsize,4,64,64]
        
        nn.Conv2d(4,8,kernel_size=3,padding=1), # [batchsize,4,64,64] -> [batchsize,8,64,64]
        nn.ReLU(),
        nn.Conv2d(8,16,kernel_size=3,padding=1), # [batchsize,8,64,64] -> [batchsize,16,64,64]
        nn.ReLU(),
        nn.MaxPool2d(2,2)) # [batchsize,16,64,64] -> [batchsize,16,32,32]
        
        self.layer2 = nn.Sequential(
        nn.Conv2d(16,8,kernel_size=1,padding=16), # [batchsize,16,32,32] -> [batchsize,8,64,64]
        nn.ReLU(),
        nn.Conv2d(8,3,kernel_size=1,padding=32), # [batchsize,8,64,64] -> [batchsize,3,128,128]
        nn.ReLU())
        
        self.layer3 = nn.Sequential(
        nn.Conv2d(3,4,kernel_size=3,padding=1), # [batchsize,3,128,128] -> [batchsize,4,128,128]
        nn.ReLU(),
        nn.MaxPool2d(2,2), # [batchsize,4,128,128] -> [batchsize,4,64,64]
            
        nn.Conv2d(4,8,kernel_size=3,padding=1), # [batchsize,4,64,64] -> [batchsize,8,64,64]
        nn.ReLU(),
        nn.Conv2d(8,16,kernel_size=3,padding=1), # [batchsize,8,64,64] -> [batchsize,16,64,64]
        nn.ReLU(),
        nn.MaxPool2d(2,2)) # [batchsize,16,64,64] -> [batchsize,16,32,32]
        
        
        self.fc_layer = nn.Sequential(
        nn.Linear(16*32*32,128),
        nn.ReLU(),
        nn.Linear(128,2))
    
    def forward(self,x):
        init_x = x
        out = self.layer1(x)
        out = self.layer2(out)+init_x
        out = self.layer3(out)
        out = out.view(x.size(0),-1)
        out = self.fc_layer(out)
        return nn.LogSoftmax(dim=1)(out)

In [6]:
# my_resnet_viz = make_dot(My_ResNet()(torch.randn(1,3,128,128)))
# my_resnet_viz

In [None]:
vgg16 = models.vgg16(pretrained=True)
for p in vgg16.parameters():
    p.requires_grad = False

In [None]:
num_featrues = num_features = vgg16.classifier[6].in_features
features = list(vgg16.classifier.children())[:-1]
features.extend([nn.Linear(num_features, 2)])
vgg16.classifier = nn.Sequential(*features)

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

epochs = 50

In [8]:
plot_list = {'train':[],'val':[],'accuracy':[]}

for epoch in range(epochs):
    
    model.train()
    train_losses = []
    for i,data in enumerate(train_dataloader):
        inputs,targets = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        
        outputs = model(inputs.detach())
        train_loss = criterion(outputs,targets)
        train_loss.backward()
        optimizer.step()
        
        train_losses.append(train_loss.detach()) ## train_loss를 detach하지않으면 gpu에 계속 남아서 메모리를 잡아먹음...
                                                 ## 진짜 애많이먹었습니다....
                                                 ## data와 inputs도 혹시모르니...삭제
        del data
        del inputs
        
    
    val_losses = []
    correct = 0
    model.eval()
    for data in val_dataloader:
        inputs,targets = data[0].to(device), data[1].to(device)

        outputs = model(inputs)
        val_loss = criterion(outputs,targets)
        val_losses.append(val_loss.detach())

        prob,label = torch.exp(outputs).topk(1,dim=1)
        if targets==label.view(1):
            correct+=1
        del data
        del inputs
    val_accuracy = correct/len(val_set)
    
    print(f"{epoch+1} epoch train loss = {sum(train_losses)/len(train_losses)}")
    print(f"{epoch+1} epoch val loss = {sum(val_losses)/len(val_losses)}")
    print(f"{epoch+1} epoch accuracy = {val_accuracy}")
    print('--------------------------------------------------')
    plot_list['train'].append(sum(train_losses)/len(train_losses))
    plot_list['val'].append(sum(val_losses)/len(val_losses))
    plot_list['accuracy'].append(val_accuracy)
    
#     if epoch < 11:continue
#     if sum(plot_list['val'][-11:-6])/5 < sum(val_losses)/len(val_losses):
#         print(f'over_fitting is occured at {epoch} epoch')
#         break
        

1 epoch train loss = 0.7497134804725647
1 epoch val loss = 0.6913206577301025
1 epoch accuracy = 0.5484
--------------------------------------------------
2 epoch train loss = 0.6905684471130371
2 epoch val loss = 0.6863915920257568
2 epoch accuracy = 0.5552
--------------------------------------------------
3 epoch train loss = 0.6850193738937378
3 epoch val loss = 0.6769543290138245
3 epoch accuracy = 0.5952
--------------------------------------------------
4 epoch train loss = 0.6777166128158569
4 epoch val loss = 0.6655076742172241
4 epoch accuracy = 0.628
--------------------------------------------------
5 epoch train loss = 0.6689168810844421
5 epoch val loss = 0.6625101566314697
5 epoch accuracy = 0.6528
--------------------------------------------------
6 epoch train loss = 0.658759593963623
6 epoch val loss = 0.6459168195724487
6 epoch accuracy = 0.6502
--------------------------------------------------
7 epoch train loss = 0.6545872688293457
7 epoch val loss = 0.64241027832