In [59]:
import sys
import os

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from matplotlib import patches

import cv2
import torch
import torchvision
from torchvision import datasets, models, transforms
from torch import nn
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils import data as torch_data
from torchvision import transforms as T
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection import FasterRCNN

from torch.utils.data import Dataset
from PIL import Image 
from xml.etree import ElementTree as ET
import glob 
from torch.utils.data import DataLoader
from copy import deepcopy

In [3]:
%pwd

'/kaggle/working'

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
images = '/kaggle/input/stanford-dogs-dataset/images/'
annotations = '/kaggle/input/stanford-dogs-dataset/annotations'

In [None]:
def get_image(annot):
    img_path = '/kaggle/input/stanford-dogs-dataset/images/Images/'
    file = annot.split('/')
    img_filename = img_path + file[-2]+'/'+file[-1]+'.jpg'
    return img_filename

In [None]:
annotations = glob.glob('/kaggle/input/stanford-dogs-dataset/annotations/Annotation/*/*')

plt.figure(figsize=(10,6))
for i in range(8):
    plt.subplot(2,4,i+1)
    plt.axis("off")
    dog = get_image(annotations[i])
    im = Image.open(dog)
    im = im.resize((256,256), Image.ANTIALIAS)
    plt.imshow(im)

In [4]:
root_dir = '/kaggle/input/stanford-dogs-dataset/'
img_dir = '/images/Images/'
annot_dir = '/annotations/Annotation/'

In [5]:
import os

labels_map = {}
for i, item in enumerate(os.listdir(root_dir + img_dir)):
    sub_folder = os.path.join(root_dir + img_dir, item)
    labels_map[sub_folder.split('-', maxsplit=3)[-1]] = i

In [6]:
labels_map

{'otterhound': 0,
 'cocker_spaniel': 1,
 'Brittany_spaniel': 2,
 'Afghan_hound': 3,
 'Maltese_dog': 4,
 'schipperke': 5,
 'Irish_setter': 6,
 'Pekinese': 7,
 'golden_retriever': 8,
 'vizsla': 9,
 'Welsh_springer_spaniel': 10,
 'Staffordshire_bullterrier': 11,
 'Border_collie': 12,
 'Irish_terrier': 13,
 'Eskimo_dog': 14,
 'pug': 15,
 'kelpie': 16,
 'Yorkshire_terrier': 17,
 'Tibetan_terrier': 18,
 'Walker_hound': 19,
 'affenpinscher': 20,
 'Cardigan': 21,
 'English_springer': 22,
 'English_foxhound': 23,
 'West_Highland_white_terrier': 24,
 'Lakeland_terrier': 25,
 'Rhodesian_ridgeback': 26,
 'Gordon_setter': 27,
 'Lhasa': 28,
 'curly-coated_retriever': 29,
 'beagle': 30,
 'Tibetan_mastiff': 31,
 'Sussex_spaniel': 32,
 'Saint_Bernard': 33,
 'toy_terrier': 34,
 'standard_poodle': 35,
 'Bernese_mountain_dog': 36,
 'Pomeranian': 37,
 'Ibizan_hound': 38,
 'redbone': 39,
 'toy_poodle': 40,
 'basset': 41,
 'Scottish_deerhound': 42,
 'miniature_pinscher': 43,
 'basenji': 44,
 'Border_terrier'

In [7]:
def img_crop(annot_path, img):
    tree = ET.parse(annot_path)
    obj = tree.find('./object')
    bndbox = obj.find('bndbox')

    # 강아지 종류
    species = obj.find('name').text

    # 이미지에서의 강아지 위치
    xmin = int(bndbox.find('xmin').text)
    ymin = int(bndbox.find('ymin').text)
    xmax = int(bndbox.find('xmax').text)
    ymax = int(bndbox.find('ymax').text)

    cropped_img = img[ymin:ymax, xmin:xmax]
    
    label = labels_map.get(species)

    return label, cropped_img

In [8]:
class DogsDataset(Dataset):
    def __init__(self, annot_dir, img_dir, transform=None):
        annot_dir = glob.glob(root_dir + annot_dir + '*/*')
        img_dir = glob.glob(root_dir + img_dir + '*/*.jpg')
        self.annot_dir = sorted(annot_dir)
        self.img_dir = sorted(img_dir)
        self.transform = transform
   
    def __len__(self):
        return len(self.img_dir)
        
    def __getitem__(self, idx):
        annot_path = self.annot_dir[idx]
        img_path = self.img_dir[idx]
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        label, img = img_crop(annot_path, img)
        
        if self.transform is not None:
            img = self.transform(image=img)
            img['label'] = label
            return img
            
        sample = {'image': img, 'label': label}
        return sample

In [9]:
data_transform = A.Compose([A.Resize(224, 224), A.Normalize(),ToTensorV2()])

In [10]:
image_dataset = DogsDataset(annot_dir ='/annotations/Annotation/',
                            img_dir ='/images/Images/', transform=data_transform)

In [11]:
len(image_dataset)

20580

In [12]:
image_dataset[0]

{'image': tensor([[[-1.0048, -1.0048, -1.0390,  ...,  0.4508,  0.5022,  0.2453],
          [-1.0219, -1.0390, -1.0733,  ..., -0.1314, -0.3027, -0.4739],
          [-1.0390, -1.0562, -1.0904,  ..., -0.1314, -0.3883, -0.6965],
          ...,
          [ 2.2489,  2.2489,  2.2489,  ..., -2.0323, -2.0152, -1.9124],
          [ 2.2489,  2.2489,  2.2489,  ..., -2.0152, -2.0494, -1.9638],
          [ 2.2489,  2.2489,  2.2489,  ..., -2.0494, -2.0837, -2.0837]],
 
         [[-1.1604, -1.1604, -1.1954,  ...,  0.8529,  0.9405,  0.6954],
          [-1.1779, -1.1954, -1.2304,  ...,  0.2052,  0.0301, -0.1099],
          [-1.1954, -1.2129, -1.2479,  ...,  0.1702, -0.0749, -0.3550],
          ...,
          [ 2.4286,  2.4286,  2.4286,  ..., -1.9482, -1.9307, -1.8256],
          [ 2.4286,  2.4286,  2.4286,  ..., -1.9307, -1.9657, -1.8782],
          [ 2.4286,  2.4286,  2.4286,  ..., -1.8957, -1.9307, -1.9307]],
 
         [[-0.9853, -1.0027, -1.0201,  ...,  0.3045,  0.3742,  0.1128],
          [-1.0027,

In [13]:
image_dataset[0]['label']

71

In [14]:
image_dataset[0]['image'].shape

torch.Size([3, 224, 224])

In [15]:
d = image_dataset[0]
d['label']

71

In [16]:
len(image_dataset)

20580

In [18]:
#total_label = [data['label'] for data in image_dataset]

In [14]:
# total_label =[]
# for i in range(len(image_dataset)):
#     total_label.append(image_dataset[i]['label'])

KeyboardInterrupt: 

In [None]:
# total_label=[]
# for data in image_dataset:
#     total_label.append(data['label'])

****데이터셋 나누기****

In [19]:
# train과  test 나누기 (인덱싱)
from sklearn.model_selection import train_test_split

train_indices, test_indices = train_test_split(
                            range(len(image_dataset)), # X의 index
                            test_size= 0.2, random_state=42)

In [20]:
len(train_indices), len(test_indices)

(16464, 4116)

In [21]:
# train set과  test set 나누기 
from torch.utils.data import Subset
train_set = Subset(image_dataset, train_indices)
test_set = Subset(image_dataset, test_indices)

In [22]:
print(type(train_set), len(train_set))
print(type(test_set), len(test_set))

<class 'torch.utils.data.dataset.Subset'> 16464
<class 'torch.utils.data.dataset.Subset'> 4116


In [23]:
train_set[0]['image']

tensor([[[-1.2959, -0.1143, -0.0287,  ..., -1.6727, -1.6555, -1.7754],
         [-1.2274, -0.5767, -0.2513,  ..., -1.5699, -1.3987, -1.7754],
         [-1.3987, -0.6794, -0.4397,  ..., -1.6727, -1.7412, -1.7583],
         ...,
         [ 0.0741,  0.1426,  0.0227,  ...,  0.0398,  0.0912,  0.2796],
         [ 0.1083, -0.0116,  0.0227,  ...,  0.9132,  0.3994,  0.6049],
         [ 0.0398,  0.0227,  0.0741,  ...,  0.5878,  0.6221,  0.5878]],

        [[-1.9482, -0.7927, -0.6877,  ..., -1.4405, -1.4055, -1.4930],
         [-1.7381, -1.1078, -0.7752,  ..., -1.3529, -1.1604, -1.4755],
         [-1.7206, -1.0203, -0.8102,  ..., -1.4755, -1.5105, -1.4755],
         ...,
         [-0.4601, -0.4076, -0.5826,  ..., -0.3200, -0.2850, -0.0399],
         [-0.4951, -0.6527, -0.6527,  ...,  0.6429,  0.1176,  0.3803],
         [-0.5826, -0.6176, -0.6001,  ...,  0.3803,  0.4328,  0.4328]],

        [[-1.7696, -0.7413, -0.6890,  ..., -0.8807, -0.8458, -0.9504],
         [-1.6824, -1.0898, -0.8110,  ..., -0

In [24]:
# train set에서 train과 valid로 나누기(인덱싱)
from sklearn.model_selection import train_test_split

train_set_indices, valid_indices = train_test_split(
                            range(len(train_set)), # X의 index                        
                            test_size= 0.2, random_state=42)

In [25]:
len(train_set_indices), len(valid_indices)

(13171, 3293)

In [26]:
(13171+ 3293)

16464

In [27]:
# train set에서 train과 valid로 나누기
from torch.utils.data import Subset
trainset = Subset(train_set, train_set_indices)
validset = Subset(train_set, valid_indices)

In [17]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

total_label = [data['label'] for data in image_dataset]

trainset_idx, testset_idx = train_test_split(range(len(image_dataset)),
                test_size=0.2, random_state=42, shuffle=True, stratify=total_label)

# 전체 데이터 셋이서 train과 test로 나누기
train_set = Subset(image_dataset, trainset_idx)
test_set = Subset(image_dataset, testset_idx)

# train label
train_label = [data['label'] for data in train_set]

# train idx와 valid idx
trainset_idx, validset_idx = train_test_split(range(len(trainset_idx)),
                test_size=0.2, random_state=42, shuffle=True, stratify=train_label)

# train set에서 train과 valid로 나누기
from torch.utils.data import Subset
trainset = Subset(train_set, trainset_idx)
validset = Subset(train_set, validset_idx)

In [18]:
print(type(trainset), len(trainset))
print(type(validset), len(validset))
print(type(test_set), len(test_set))

<class 'torch.utils.data.dataset.Subset'> 13171
<class 'torch.utils.data.dataset.Subset'> 3293
<class 'torch.utils.data.dataset.Subset'> 4116


29635

In [19]:
trainset[0]['image'].shape

torch.Size([3, 224, 224])

In [20]:
validset[0]['image'].shape

torch.Size([3, 224, 224])

In [21]:
test_set[0]['image'].shape

torch.Size([3, 224, 224])

****ResNet50 모델**** 

In [22]:
batch_size = 16 # 100 -> 16
# dataloader = DataLoader(데이터셋, 배치사이즈, 셔플여부.....)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True) # 훈련용 13171개의 데이터를 100개씩 준비
validloader = DataLoader(validset, batch_size=batch_size, shuffle=False) # 검증용 10000개의 데이터를 100개씩 준비
testloader = DataLoader(test_set, batch_size=batch_size, shuffle=False) # 테스트용 10000개의 데이터를 100개씩 준비

In [23]:
print(type(trainloader), len(trainloader))
print(type(validloader), len(validloader))
print(type(testloader), len(testloader))

<class 'torch.utils.data.dataloader.DataLoader'> 824
<class 'torch.utils.data.dataloader.DataLoader'> 206
<class 'torch.utils.data.dataloader.DataLoader'> 258


In [24]:
13171/16, 3293/16, 4116/16

(823.1875, 205.8125, 257.25)

In [25]:
train_iter = iter(trainloader)
batch = next(train_iter)
batch['image'].size(), batch['label'].shape

(torch.Size([16, 3, 224, 224]), torch.Size([16]))

In [26]:
import torch.nn as nn # 파이토치에서 제공하는 다양한 계층 (Linear Layer, ....)
import torch.optim as optim # 옵티마이저 (경사하강법...)
import torch.nn.functional as F # 파이토치에서 제공하는 함수(활성화 함수...)

**** conv block 별 사이즈 확인****

In [37]:
# conv1
conv1 = nn.Sequential(
    # BatchNorm 계층은 편향값의 효과를 보완해주므로 관례상 생략
                            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False), # [16, 65, 112, 112]
                            nn.BatchNorm2d(num_features=64),
                            nn.ReLU(),                        
                            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
                            ) # [16, 64, 56, 56]
conv1_out = conv1(batch['image'])                                     
conv1_out.shape           

torch.Size([16, 64, 56, 56])

In [41]:
# conv2_x      
shortcut2 = nn.Sequential(
                                nn.Conv2d(in_channels=64, out_channels=256, kernel_size=1, stride=1), 
                                nn.BatchNorm2d(num_features=256)                                  
                              )  
conv2_x = nn.Sequential(
                              ResBlock(in_channels=64, out_channels=64, shortcut=shortcut2, stride=1),                                 
                              ResBlock(in_channels=256, out_channels=64, shortcut=None, stride=1),
                              ResBlock(in_channels=256, out_channels=64, shortcut=None, stride=1)
                            ) # [16, 256, 56, 56]
conv2_x_out = conv2_x(conv1_out)                                     
conv2_x_out.shape          

torch.Size([16, 256, 56, 56])

In [42]:
# conv3_x
shortcut3 = nn.Sequential(
                                nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2), 
                                nn.BatchNorm2d(num_features=512)                                  
                              )      
conv3_x = nn.Sequential(
                              ResBlock(in_channels=256, out_channels=128, shortcut=shortcut3, stride=2),
                              ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1),
                              ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1),
                              ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1)

                            ) # [16, 512, 28, 28] 
conv3_x_out = conv3_x(conv2_x_out)                                     
conv3_x_out.shape       

torch.Size([16, 512, 28, 28])

In [43]:
# conv4_x
shortcut4 = nn.Sequential(
                                nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=2), 
                                nn.BatchNorm2d(num_features=1024)                                  
                              )      
conv4_x = nn.Sequential(
                              ResBlock(in_channels=512, out_channels=256, shortcut=shortcut4, stride=2),
                              ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                              ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                              ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                              ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                              ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),                                                                                        
                            ) # [16, 1024, 14, 14]
conv4_x_out = conv4_x(conv3_x_out)                                     
conv4_x_out.shape 

torch.Size([16, 1024, 14, 14])

In [44]:
# conv5_x
shortcut5 = nn.Sequential(
                                nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=1, stride=2), 
                                nn.BatchNorm2d(num_features=2048)                                  
                              )    
conv5_x = nn.Sequential(
                              ResBlock(in_channels=1024, out_channels=512, shortcut=shortcut5, stride=2),
                              ResBlock(in_channels=2048, out_channels=512, shortcut=None, stride=1),
                              ResBlock(in_channels=2048, out_channels=512, shortcut=None, stride=1),                                                                
                            ) # [16, 2048, 7, 7]  
conv5_x_out = conv5_x(conv4_x_out)                                     
conv5_x_out.shape  

torch.Size([16, 2048, 7, 7])

In [45]:
avg_pool = nn.AdaptiveAvgPool2d((1, 1))  # [16, 2048, 1, 1]  
avg_pool_out = avg_pool(conv5_x_out)
avg_pool_out.shape

torch.Size([16, 2048, 1, 1])

In [60]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, shortcut=None, stride=1): # shortcut에 계층을 설정되어 있다면 그 계층을 통과한뒤 Add
        super().__init__()

        # 1x1 conv
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        # 3x3 conv 
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1) # stride=2일 경우에는 downsampling
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        # 1x1 conv
        self.conv3 = nn.Conv2d(out_channels, out_channels*4, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*4)

        self.shortcut = shortcut
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x.clone()
        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.relu(self.batch_norm2(self.conv2(x)))
        x = self.batch_norm3(self.conv3(x))

        # shortcut 계층을 바깥에서 설정한것을 적용할 때
        if self.shortcut is not None:
            identity = self.shortcut(identity)

        x += identity  # x = x+identity
        x = self.relu(x)

        return x


In [61]:
class ResNet50(nn.Module):
    def __init__(self):
        super().__init__()
        # conv1
        self.conv1 = nn.Sequential(
                                    # BatchNorm 계층은 편향값의 효과를 보완해주므로 관례상 생략략
                                    nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
                                    nn.BatchNorm2d(num_features=64),
                                    nn.ReLU(),                        
                                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
                                    ) # [16, 64, 56, 56]
        # conv2_x      
        self.shortcut2 = nn.Sequential(
                                        nn.Conv2d(in_channels=64, out_channels=256, kernel_size=1, stride=1), 
                                        nn.BatchNorm2d(num_features=256)                                  
                                      )              
        self.conv2_x = nn.Sequential(
                                      ResBlock(in_channels=64, out_channels=64, shortcut=self.shortcut2, stride=1),                                 
                                      ResBlock(in_channels=256, out_channels=64, shortcut=None, stride=1),
                                      ResBlock(in_channels=256, out_channels=64, shortcut=None, stride=1)
                                    ) # [16, 256, 56, 56]
        # conv3_x
        self.shortcut3 = nn.Sequential(
                                        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2), 
                                        nn.BatchNorm2d(num_features=512)                                  
                                      )      
        self.conv3_x = nn.Sequential(
                                      ResBlock(in_channels=256, out_channels=128, shortcut=self.shortcut3, stride=2),
                                      ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1),
                                      ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1),
                                      ResBlock(in_channels=512, out_channels=128, shortcut=None, stride=1)

                                    ) # [16, 512, 28, 28]   
        # conv4_x
        self.shortcut4 = nn.Sequential(
                                        nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=2), 
                                        nn.BatchNorm2d(num_features=1024)                                  
                                      )      
        self.conv4_x = nn.Sequential(
                                     ResBlock(in_channels=512, out_channels=256, shortcut=self.shortcut4, stride=2),
                                     ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                                     ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                                     ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                                     ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),
                                     ResBlock(in_channels=1024, out_channels=256, shortcut=None, stride=1),                                                                                        
                                    ) # [16, 1024, 14, 14] 
        # conv5_x
        self.shortcut5 = nn.Sequential(
                                        nn.Conv2d(in_channels=1024, out_channels=2048, kernel_size=1, stride=2), 
                                        nn.BatchNorm2d(num_features=2048)                                  
                                      )    
        self.conv5_x = nn.Sequential(
                                     ResBlock(in_channels=1024, out_channels=512, shortcut=self.shortcut5, stride=2),
                                     ResBlock(in_channels=2048, out_channels=512, shortcut=None, stride=1),
                                     ResBlock(in_channels=2048, out_channels=512, shortcut=None, stride=1),                                                                
                                    ) # [16, 2048, 7, 7]  

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))  # [16, 2048, 1, 1]                                                                                                                              

        self.classifier = nn.Sequential(
                                    nn.Linear(in_features=2048, out_features=120),
                                    # nn.BatchNorm1d(num_features=64),
                                    # nn.ReLU(),
                                    # nn.Linear(in_features=64, out_features=2)
                                    )

    def forward(self, x):
        x = self.conv1(x) # [16, 64, 56, 56]
        x = self.conv2_x(x) # [16, 256, 56, 56]
        x = self.conv3_x(x) # [16, 512, 28, 28] 
        x = self.conv4_x(x) # [16, 1024, 14, 14] 
        x = self.conv5_x(x) # [16, 2048, 7, 7] 
        x = self.avg_pool(x) # [16, 2048, 1, 1] 

        # reshape할 형상 : (batch_size x 2048)
        # x = x.view(-1, 2048) # option 1 : view
        x = torch.flatten(x, 1) # option 2 : flatten 
        # x = x.reshape(x.shape[0], -1) # option 3 : reshape

        x = self.classifier(x)    
        return x

In [62]:
model = ResNet50()
model.to(device)
model

ResNet50(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (shortcut2): Sequential(
    (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2_x): Sequential(
    (0): ResBlock(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batch_norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
      (batch_norm3): BatchNorm2d(256, eps=1e-05, 

In [63]:
out = model(batch['image'].to(device))
out.shape

torch.Size([16, 120])

In [31]:
for name, parameter in model.named_parameters():
    print(name, parameter.size())

conv1.0.weight torch.Size([64, 3, 7, 7])
conv1.1.weight torch.Size([64])
conv1.1.bias torch.Size([64])
shortcut2.0.weight torch.Size([256, 64, 1, 1])
shortcut2.0.bias torch.Size([256])
shortcut2.1.weight torch.Size([256])
shortcut2.1.bias torch.Size([256])
conv2_x.0.conv1.weight torch.Size([64, 64, 1, 1])
conv2_x.0.conv1.bias torch.Size([64])
conv2_x.0.batch_norm1.weight torch.Size([64])
conv2_x.0.batch_norm1.bias torch.Size([64])
conv2_x.0.conv2.weight torch.Size([64, 64, 3, 3])
conv2_x.0.conv2.bias torch.Size([64])
conv2_x.0.batch_norm2.weight torch.Size([64])
conv2_x.0.batch_norm2.bias torch.Size([64])
conv2_x.0.conv3.weight torch.Size([256, 64, 1, 1])
conv2_x.0.conv3.bias torch.Size([256])
conv2_x.0.batch_norm3.weight torch.Size([256])
conv2_x.0.batch_norm3.bias torch.Size([256])
conv2_x.1.conv1.weight torch.Size([64, 256, 1, 1])
conv2_x.1.conv1.bias torch.Size([64])
conv2_x.1.batch_norm1.weight torch.Size([64])
conv2_x.1.batch_norm1.bias torch.Size([64])
conv2_x.1.conv2.weight tor

In [64]:
learning_rate = 0.0001
# 손실함수
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저(최적화함수, 예:경사하강법)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# 규제의 강도 설정 weight_decay
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)

# Learning Rate Schedule
# https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html

# 모니터링하고 있는 값(예:valid_loss)의 최소값(min) 또는 최대값(max) patience 기간동안 줄어들지 않을 때(OnPlateau) lr에 factor(0.1)를 곱해주는 전략
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)

In [65]:
def validate(model, validloader, loss_fn):
    total = 0   
    correct = 0
    valid_loss = 0
    valid_accuracy = 0

  # 전방향 예측을 구할 때는 gradient가 필요가 없음음
    with torch.no_grad():
        for batch in validloader:# 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)      
          # images, labels : (torch.Size([16, 3, 224, 224]), torch.Size([16]))
          # 0. Data를 GPU로 보내기
            images = batch['image']
            labels = batch['label']
            
            images, labels = images.to(device), labels.to(device)
            
            # 1. 입력 데이터 준비
            # not Flatten !!
            # images.resize_(images.size()[0], 784)
  
            # 2. 전방향(Forward) 예측
            logit = model(images) # 예측 점수
            _, preds = torch.max(logit, 1) # 배치에 대한 최종 예측
            # preds = logit.max(dim=1)[1] 
            correct += int((preds == labels).sum()) # 배치 중 맞은 것의 개수가 correct에 누적
            total += labels.shape[0] # 배치 사이즈만큼씩 total에 누적

            loss = loss_fn(logit, labels)
            valid_loss += loss.item() # tensor에서 값을 꺼내와서, 배치의 loss 평균값을 valid_loss에 누적

        valid_accuracy = correct / total
  
    return valid_loss, valid_accuracy

In [66]:
images.shape

NameError: name 'images' is not defined

In [1]:
 for epoch in range(epochs):
    model.train() # 훈련 모드
    train_loss = 0
    for batch in trainloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
      steps += 1
      # images, labels : (torch.Size([16, 3, 224, 224]), torch.Size([16]))
      images = batch['image']
      key_pts = batch['keypoints']

NameError: name 'trainloader' is not defined

In [67]:

def train_loop(model, trainloader, loss_fn, epochs, optimizer):  
    steps = 0
    steps_per_epoch = len(trainloader) 
    min_loss = 1000000
    max_accuracy = 0
    trigger = 0
    patience = 7 

    for epoch in range(epochs):
        model.train() # 훈련 모드
        train_loss = 0
        for batch in trainloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
            steps += 1
            # images, labels : (torch.Size([16, 3, 224, 224]), torch.Size([16]))
            # 0. Data를 GPU로 보내기
            images = batch['image']
            labels = batch['label']
            images, labels = images.to(device), labels.to(device)

            # 1. 입력 데이터 준비
            # not Flatten !!
            # images.resize_(images.shape[0], 784) 

            # 2. 전방향(forward) 예측
            predict = model(images) # 예측 점수
            loss = loss_fn(predict, labels) # 예측 점수와 정답을 CrossEntropyLoss에 넣어 Loss값 반환

            # 3. 역방향(backward) 오차(Gradient) 전파
            optimizer.zero_grad() # Gradient가 누적되지 않게 하기 위해
            loss.backward() # 모델파리미터들의 Gradient 전파

            # 4. 경사 하강법으로 모델 파라미터 업데이트
            optimizer.step() # W <- W -lr*Gradient

            train_loss += loss.item()
            if (steps % steps_per_epoch) == 0 : 
                model.eval() # 평가 모드 : 평가에서 사용하지 않을 계층(배치 정규화, 드롭아웃)들을 수행하지 않게 하기 위해서
                valid_loss, valid_accuracy = validate(model, validloader, loss_fn)
            # -------------------------------------------

                print('Epoch : {}/{}.......'.format(epoch+1, epochs),            
                   'Train Loss : {:.3f}'.format(train_loss/len(trainloader)), 
                   'Valid Loss : {:.3f}'.format(valid_loss/len(validloader)), 
                   'Valid Accuracy : {:.3f}'.format(valid_accuracy)            
                      )

              # Best model 저장    
              # option 1 : valid_loss 모니터링
              # if valid_loss < min_loss: # 바로 이전 epoch의 loss보다 작으면 저장하기
              #   min_loss = valid_loss
              #   best_model_state = deepcopy(model.state_dict())          
              #   torch.save(best_model_state, 'best_checkpoint.pth')     

              # option 2 : valid_accuracy 모니터링      
                if valid_accuracy > max_accuracy : # 바로 이전 epoch의 accuracy보다 크면 저장하기
                    max_accuracy = valid_accuracy
                    best_model_state = deepcopy(model.state_dict())          
                    torch.save(best_model_state, 'best_checkpoint.pth')  
              # -------------------------------------------

              # Early Stopping (조기 종료)
                if valid_loss > min_loss: # valid_loss가 min_loss를 갱신하지 못하면
                    trigger += 1
                    print('trigger : ', trigger)
                if trigger > patience:
                    print('Early Stopping !!!')
                    print('Training loop is finished !!')
                    return
                else:
                    trigger = 0
                    min_loss = valid_loss
                # -------------------------------------------

                # Learning Rate Scheduler
                scheduler.step(valid_loss)
            # -------------------------------------------

    return  

In [68]:
epochs = 55
%time train_loop(model, trainloader, loss_fn, epochs, optimizer)

Epoch : 1/55....... Train Loss : 4.540 Valid Loss : 4.684 Valid Accuracy : 0.059
Epoch : 2/55....... Train Loss : 4.022 Valid Loss : 4.136 Valid Accuracy : 0.085
Epoch : 3/55....... Train Loss : 3.663 Valid Loss : 3.913 Valid Accuracy : 0.126
Epoch : 4/55....... Train Loss : 3.341 Valid Loss : 3.265 Valid Accuracy : 0.181
Epoch : 5/55....... Train Loss : 3.026 Valid Loss : 2.997 Valid Accuracy : 0.224
Epoch : 6/55....... Train Loss : 2.724 Valid Loss : 3.002 Valid Accuracy : 0.241
trigger :  1
Epoch : 7/55....... Train Loss : 2.377 Valid Loss : 2.710 Valid Accuracy : 0.303
Epoch : 8/55....... Train Loss : 2.048 Valid Loss : 2.691 Valid Accuracy : 0.312
Epoch : 9/55....... Train Loss : 1.715 Valid Loss : 2.953 Valid Accuracy : 0.305
trigger :  1
Epoch : 10/55....... Train Loss : 1.368 Valid Loss : 3.302 Valid Accuracy : 0.279
trigger :  1
Epoch : 11/55....... Train Loss : 1.055 Valid Loss : 2.716 Valid Accuracy : 0.346
Epoch : 12/55....... Train Loss : 0.784 Valid Loss : 3.155 Valid Acc

## 모델 예측 

In [70]:
test_iter = iter(testloader)
batch = next(test_iter)
images = batch['image']
labels = batch['label']

images, labels = images.to(device), labels.to(device)
print(images.size(), labels.size())

torch.Size([16, 3, 224, 224]) torch.Size([16])


In [76]:
def evaluation(model, testloader, loss_fn):
    total = 0   
    correct = 0
    test_loss = 0
    test_accuracy = 0

  # 전방향 예측을 구할 때는 gradient가 필요가 없음음
    with torch.no_grad():
        for images, labels in testloader: # 이터레이터로부터 next()가 호출되며 미니배치를 반환(images, labels)
          # 0. Data를 GPU로 보내기
            images = batch['image']
            labels = batch['label']
            images, labels = images.to(device), labels.to(device)
          # 1. 입력 데이터 준비
          # not Flatten
          # images.resize_(images.size()[0], 784)
      
          # 2. 전방향(Forward) 예측
            logit = model(images) # 예측 점수
            _, preds = torch.max(logit, 1) # 배치에 대한 최종 예측
            # preds = logit.max(dim=1)[1] 
            correct += int((preds == labels).sum()) # 배치치 중 맞은 것의 개수가 correct에 누적
            total += labels.shape[0] # 배치 사이즈만큼씩 total에 누적

            loss = loss_fn(logit, labels)
            test_loss += loss.item() # tensor에서 값을 꺼내와서, 배치의 loss 평균값을 valid_loss에 누적
        
        test_accuracy = correct / total
   
        print('Test Loss : {:.3f}'.format(test_loss/len(testloader)), 
        'Test Accuracy : {:.3f}'.format(test_accuracy))

model.eval()
evaluation(model, testloader, loss_fn)  

Test Loss : 1.320 Test Accuracy : 0.625


## 모델 저장

In [77]:
# 현재 모델에 저장되어 있는 모델 파라미터터
model.state_dict().keys()

odict_keys(['conv1.0.weight', 'conv1.1.weight', 'conv1.1.bias', 'conv1.1.running_mean', 'conv1.1.running_var', 'conv1.1.num_batches_tracked', 'shortcut2.0.weight', 'shortcut2.0.bias', 'shortcut2.1.weight', 'shortcut2.1.bias', 'shortcut2.1.running_mean', 'shortcut2.1.running_var', 'shortcut2.1.num_batches_tracked', 'conv2_x.0.conv1.weight', 'conv2_x.0.conv1.bias', 'conv2_x.0.batch_norm1.weight', 'conv2_x.0.batch_norm1.bias', 'conv2_x.0.batch_norm1.running_mean', 'conv2_x.0.batch_norm1.running_var', 'conv2_x.0.batch_norm1.num_batches_tracked', 'conv2_x.0.conv2.weight', 'conv2_x.0.conv2.bias', 'conv2_x.0.batch_norm2.weight', 'conv2_x.0.batch_norm2.bias', 'conv2_x.0.batch_norm2.running_mean', 'conv2_x.0.batch_norm2.running_var', 'conv2_x.0.batch_norm2.num_batches_tracked', 'conv2_x.0.conv3.weight', 'conv2_x.0.conv3.bias', 'conv2_x.0.batch_norm3.weight', 'conv2_x.0.batch_norm3.bias', 'conv2_x.0.batch_norm3.running_mean', 'conv2_x.0.batch_norm3.running_var', 'conv2_x.0.batch_norm3.num_batche

In [78]:
torch.save(model.state_dict(), 'last_checkpoint.pth')

In [79]:
# 시간이 흐른뒤 다시 모델 가져오기
last_state_dict = torch.load('last_checkpoint.pth')

In [80]:
last_state_dict.keys()

odict_keys(['conv1.0.weight', 'conv1.1.weight', 'conv1.1.bias', 'conv1.1.running_mean', 'conv1.1.running_var', 'conv1.1.num_batches_tracked', 'shortcut2.0.weight', 'shortcut2.0.bias', 'shortcut2.1.weight', 'shortcut2.1.bias', 'shortcut2.1.running_mean', 'shortcut2.1.running_var', 'shortcut2.1.num_batches_tracked', 'conv2_x.0.conv1.weight', 'conv2_x.0.conv1.bias', 'conv2_x.0.batch_norm1.weight', 'conv2_x.0.batch_norm1.bias', 'conv2_x.0.batch_norm1.running_mean', 'conv2_x.0.batch_norm1.running_var', 'conv2_x.0.batch_norm1.num_batches_tracked', 'conv2_x.0.conv2.weight', 'conv2_x.0.conv2.bias', 'conv2_x.0.batch_norm2.weight', 'conv2_x.0.batch_norm2.bias', 'conv2_x.0.batch_norm2.running_mean', 'conv2_x.0.batch_norm2.running_var', 'conv2_x.0.batch_norm2.num_batches_tracked', 'conv2_x.0.conv3.weight', 'conv2_x.0.conv3.bias', 'conv2_x.0.batch_norm3.weight', 'conv2_x.0.batch_norm3.bias', 'conv2_x.0.batch_norm3.running_mean', 'conv2_x.0.batch_norm3.running_var', 'conv2_x.0.batch_norm3.num_batche

In [81]:
# 읽어들인 모델 파라미터는 모델 아키텍처에 연결을 시켜줘야 함
# load_state_dict() 사용
last_model = ResNet50()
last_model.to(device)
last_model.load_state_dict(last_state_dict)

<All keys matched successfully>

In [82]:
last_model.eval()
evaluation(last_model, testloader, loss_fn) 

Test Loss : 1.320 Test Accuracy : 0.625


In [85]:
# valid loss or accuracy 기준 best model
best_state_dict = torch.load('best_checkpoint.pth')
best_model = ResNet50()
best_model.to(device)
best_model.load_state_dict(best_state_dict)

<All keys matched successfully>

In [86]:
best_model.eval()
evaluation(best_model, testloader, loss_fn)

Test Loss : 1.498 Test Accuracy : 0.688
