In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms

MNIST 데이터를 불러온다.

In [3]:
transform = transforms.Compose([
                                transforms.ToTensor()
])
train_dataset = datasets.MNIST('./MNIST', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./MNIST', train=False, download=True, transform=transform)

len_train = len(train_dataset)
print(len_train)

60000


모델을 설계한다.

In [4]:
class ConvNet(nn.Module):
    def __init__(self,h1=96):
        super(ConvNet, self).__init__()

        # input: 1*28*28 
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # ouput: 16*14*14

        self.fc = nn.Sequential(
            nn.Linear(16*14*14, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 10)   
        )

    def forward(self, x):
        x = self.conv1(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

GPU 사용을 위해 device를 초기화하고 random_seed를 지정한다.

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1)
if device == 'cuda':
  torch.cuda.manual_seed(1)
  torch.cuda.manual_seed_all(1)

K-Fold Cross Validation을 위해서 코드를 KFold를 import하고 객체를 생성한다.

In [6]:
from sklearn.model_selection import KFold

splits = KFold(n_splits=5, shuffle=True, random_state=1)
fold_dict = {}

train과정에서 사용할 함수를 선언한다.

In [7]:
def train_epoch(model, device, dataloader, loss_fn, optimizer):
  train_loss, train_correct = 0.0, 0
  model.train()
  for images, labels in dataloader:
    images = images.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()                                 # optimizer의 모든 파라미터들의 grad를 초기화한다.
    hypothesis = model(images)                            # images를 model에 넣어주고 예측한다.
    loss = loss_fn(hypothesis, labels)                    # loss값을 구한다.
    loss.backward()                                       # loss값에 대해 미분을 수행한다.
    optimizer.step()                                      # 학습을 진행한다. (optimizer를 한 단계 수행한다.)
    train_loss += loss.item() * images.size(0)            # loss값에 image의 개수를 곱하고 저장한다.
    scores, predictions = torch.max(hypothesis.data, 1)   # max로 가장 높게 예측한 값의 인덱스와 값(정수)를 뽑아준다.
    train_correct += (predictions == labels).sum().item() # 인덱스가 label과 맞는지 확인하고 합을 구해서 맞힌 개수를 저장한다.
  
  return train_loss, train_correct                        

validation을 위한 함수다.

In [8]:
# train 참고
def valid_epoch(model, device, dataloader, loss_fn):
  valid_loss, val_correct = 0.0, 0
  model.eval()
  for images, labels in dataloader:
      images = images.to(device)
      labels = labels.to(device)
      prediction = model(images)
      loss=loss_fn(prediction,labels)
      valid_loss+=loss.item()*images.size(0)
      scores, predictions = torch.max(prediction.data,1)
      val_correct+=(predictions == labels).sum().item()

  return valid_loss,val_correct

각 Fold를 각각 3번씩 epoch시켜서 학습시켜보자. <br>
1-Fold 5번, 2-Fold 5번, ..., 5-Fold 5번 식으로 진행된다.

In [17]:
import numpy as np
from torch.utils.data import SubsetRandomSampler

criterion = nn.CrossEntropyLoss()
batch_size = 128
num_epochs = 5

for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(train_dataset)))):
    print('Fold {}'.format(fold + 1))

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
    valid_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = ConvNet()
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    history = {'train_loss': [], 'valid_loss': [],'train_acc':[],'valid_acc':[]}

    for epoch in range(num_epochs):
        train_loss, train_correct=train_epoch(model,device,train_loader,criterion,optimizer)
        valid_loss, val_correct=valid_epoch(model,device,valid_loader,criterion)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        valid_loss = valid_loss / len(valid_loader.sampler)
        valid_acc = val_correct / len(valid_loader.sampler) * 100

        print(f"[Epoch:{epoch+1}/{num_epochs}] AVG Training Loss/Acc: {train_loss:.3f}/{train_acc:.2f}, AVG Test Loss/Acc: {valid_loss:.3f}/{valid_acc:.2f}")
        history['train_loss'].append(train_loss)
        history['valid_loss'].append(valid_loss)
        history['train_acc'].append(train_acc)
        history['valid_acc'].append(valid_acc)

    fold_dict['fold{}'.format(fold+1)] = history

torch.save(model,'k_cross_CNN.pt')  

Fold 1
[Epoch:1/5] AVG Training Loss/Acc: 0.130/96.16, AVG Test Loss/Acc: 0.073/97.91
[Epoch:2/5] AVG Training Loss/Acc: 0.038/98.92, AVG Test Loss/Acc: 0.054/98.34
[Epoch:3/5] AVG Training Loss/Acc: 0.021/99.39, AVG Test Loss/Acc: 0.060/98.18
[Epoch:4/5] AVG Training Loss/Acc: 0.011/99.74, AVG Test Loss/Acc: 0.054/98.44
[Epoch:5/5] AVG Training Loss/Acc: 0.008/99.78, AVG Test Loss/Acc: 0.065/98.14
Fold 2
[Epoch:1/5] AVG Training Loss/Acc: 0.137/96.00, AVG Test Loss/Acc: 0.062/98.19
[Epoch:2/5] AVG Training Loss/Acc: 0.042/98.78, AVG Test Loss/Acc: 0.055/98.15
[Epoch:3/5] AVG Training Loss/Acc: 0.022/99.34, AVG Test Loss/Acc: 0.052/98.43
[Epoch:4/5] AVG Training Loss/Acc: 0.012/99.67, AVG Test Loss/Acc: 0.048/98.57
[Epoch:5/5] AVG Training Loss/Acc: 0.008/99.77, AVG Test Loss/Acc: 0.053/98.38
Fold 3
[Epoch:1/5] AVG Training Loss/Acc: 0.134/96.11, AVG Test Loss/Acc: 0.067/97.96
[Epoch:2/5] AVG Training Loss/Acc: 0.041/98.79, AVG Test Loss/Acc: 0.053/98.40
[Epoch:3/5] AVG Training Loss/A

fold_dict에는 다음과 같은 값들이 저장된다.

In [20]:
for key, values in fold_dict.items():
  print(f'<{key}>')
  for key, values in values.items():
    print(f"{key}: {values}")
  break

<fold1>
train_loss: [0.1299572738458713, 0.03798096673190594, 0.02108794889661173, 0.011165055048496773, 0.008147445250302553]
valid_loss: [0.07254208528995514, 0.05435066587726275, 0.06025160567710797, 0.053579377385477224, 0.06464485293378433]
train_acc: [96.16458333333333, 98.92291666666667, 99.38749999999999, 99.73541666666667, 99.77916666666667]
valid_acc: [97.90833333333333, 98.34166666666667, 98.18333333333334, 98.44166666666668, 98.14166666666667]


5-Fold cross validation의 성능을 확인해보자.

In [21]:
train_loss, train_acc, valid_loss, valid_acc = [], [], [], []

k = 5
for i in range(k):
  train_loss.append(np.mean(fold_dict[f'fold{i+1}']['train_loss']))
  train_acc.append(np.mean(fold_dict[f'fold{i+1}']['train_acc']))
  
  valid_loss.append(np.mean(fold_dict[f'fold{i+1}']['valid_loss']))
  valid_acc.append(np.mean(fold_dict[f'fold{i+1}']['valid_acc']))

print(f'Performance of {k}-Fold Cross Validation (Train Set)')
print(f"Training Loss/Acc: {np.mean(train_loss):.3f}/{np.mean(train_acc):.2f}, Test Loss/Acc: {np.mean(valid_loss):.3f}/{np.mean(valid_acc):.2f}")

Performance of 5-Fold Cross Validation (Train Set)
Training Loss/Acc: 0.043/98.76, Test Loss/Acc: 0.054/98.40


이후 이 모델들을 앙상블시키면 더 좋은 모델을 얻을 수도 있다고 한다. <br>
앙상블은 이후에 적용해보자.

## <strong> 알게 된 점 </strong>

<h3> <strong> 1. tensor.data </strong> </h3>

tensor data에 data 속성을 사용하면 해당 데이터를 반환해준다.

In [16]:
a = torch.Tensor([[1, 2, 3, 4, 5],
                 [1, 2, 3, 4, 5],
                 [1, 2, 3, 4, 5],
                 [1, 2, 3, 4, 5]])
a.data

tensor([[1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.]])

<h3> <strong> 2. tensor.item() </strong> </h3>

tensor.item()을 사용하면 tensor내의 값만 출력할 수 있다. <br>
단, 하나의 원소만 가진 tensor여야 한다.

In [22]:
a = torch.FloatTensor([0.1])
# a = torch.FloatTensor([0.1, 0.2]) 사용시 에러 발생
print(f'tensor: {a}')
print(f'tensor.item: {a.item()}')

tensor: tensor([0.1000])
tensor.item: 0.10000000149011612


<h3> <strong> 3. DataLoader parameter-sampler </strong> </h3>

sampler는 index를 컨트롤하는 방법이다. 데이터의 index를 원하는 방식대로 조정한다. <br>
index를 컨트롤해야 하므로 **shuffle 파라미터는 False**여야 한다.





In [41]:
from torch.utils.data import TensorDataset, DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
import numpy as np

x = torch.FloatTensor([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4],
                       [5, 5, 5], [6, 6, 6], [7, 7, 7], [8, 8, 8], [9, 9, 9]])

x_dataset = TensorDataset(x) # dataset 생성
splits = KFold(n_splits=5, shuffle=True, random_state=1)

# np.arange(len(x_dataset)) -> 모든 인덱스를 담은 np array를 생성한다.
for fold, (train_idx, valid_idx) in enumerate(splits.split(np.arange(len(x_dataset)))):
  print(f'{fold+1} Fold: {train_idx} / {valid_idx}') 
  train_sampler = SubsetRandomSampler(train_idx)
  valid_sampler = SubsetRandomSampler(valid_idx)
  train_loader = DataLoader(x_dataset, batch_size=2, sampler=train_sampler)
  valid_loader = DataLoader(x_dataset, batch_size=2, sampler=valid_sampler)
  
  print('<train_loader>')
  print(f'len(train_loader.sampler): {len(train_loader.sampler)}')
  for i in train_loader:
    print(i)

  print('<valid_loader>')
  print(f'len(valid_loader.sampler): {len(valid_loader.sampler)}')
  for i in valid_loader:
    print(i)
  
  print()

  if fold == 1: break

1 Fold: [0 1 3 4 5 6 7 8] / [2 9]
<train_loader>
len(train_loader.sampler): 8
[tensor([[3., 3., 3.],
        [4., 4., 4.]])]
[tensor([[8., 8., 8.],
        [0., 0., 0.]])]
[tensor([[7., 7., 7.],
        [1., 1., 1.]])]
[tensor([[6., 6., 6.],
        [5., 5., 5.]])]
<valid_loader>
len(valid_loader.sampler): 2
[tensor([[9., 9., 9.],
        [2., 2., 2.]])]

2 Fold: [0 1 2 3 5 7 8 9] / [4 6]
<train_loader>
len(train_loader.sampler): 8
[tensor([[0., 0., 0.],
        [7., 7., 7.]])]
[tensor([[1., 1., 1.],
        [9., 9., 9.]])]
[tensor([[2., 2., 2.],
        [3., 3., 3.]])]
[tensor([[5., 5., 5.],
        [8., 8., 8.]])]
<valid_loader>
len(valid_loader.sampler): 2
[tensor([[6., 6., 6.],
        [4., 4., 4.]])]

