## Using CNN and NCM for CIFAR-10 feature extraction and classification

### Summary
CIFAR-10 데이터셋을 분류하기 위해 CNN을 이용하여 데이터셋에 대한 특징을 추출 후 NCM을 이용하여 각 class에 대한 평균 값을 이용하여 분류

<span style="color: #2D3748; background-color:#fff5b1;">Test size를 0.2로 10번 반복 실험한 결과 평균적으로 0.34의 정확도를 보여주고 있고, test 데이터 1개를 분류하는데 0.0011초의 시간이 걸린다.</span>

In [15]:
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from utils.util import Info
from sklearn.neighbors import NearestCentroid
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from utils.NCM_Classifier import train, validate
from models.resnet_feature import resnet18_feature

In [16]:
class Config(Info):
    def __init__(self):
        super(Info, self).__init__()
        self.device = 'PC'
        self.dataset = 'CIFAR_10'
        self.test_size = 0.2
        self.feature_size = 3072
        self.method = 'NCM'
        self.distance = 'Euclidean'
        self.reduction_method = [None, None] # method, n_components
        self.iter = 10

In [17]:
cig = Config()
cig.info()
cig.print_rutin()

Device ── PC
│
├──Dataset
│    └────CIFAR_10
│    └────Train size 80%
│    └────Feature size: 3072
│
├──Method
│    └────NCM
│    └────Euclidean
│
├──Dimension reduction
│    └────Method: None
│    └────Component size: None
│    └────Feature Reduction Ratio: None%
│
└──Iteration
    └────10
PC - CIFAR_10(80%) - NCM - 10 iteration


In [None]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if use multi-GPU
cudnn.deterministic = True  # 연산 처리 속도 감소 -> 모델과 코드를 배포해야 하는 연구 후반 단계에 사용
cudnn.benchmark = False

## Load CIFAR-10 Dataset

In [None]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                ])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_loader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

print(trainset.data.shape)
print(validationset.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

epoch = 100
num_class = 10

model = resnet18_feature(num_classes=num_class)
model.to(device)

classifier = NearestCentroid()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [18]:
best_acc = 0.

for idx in range(epoch):  # loop over the dataset multiple times
    # train for one epoch
    train(train_loader, model, classifier, criterion, optimizer, idx, device)

    # evaluate on validation set
    acc = validate(val_loader, model, classifier, criterion, device)

    # remember best acc@1 and save checkpoint
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)

    scheduler.step()

print('Finished Training')

Epoch: [0]
Train: [98/98]	Time 0.0249 - (0.0540)	Data 0.0010 - (0.0243)	Loss 1.4221 - (1.6207)	Acc 48.81 - (41.06)
Test: [20/20]	Time 0.0070 - (0.0959)	Data 0.0010 - (0.0855)	Loss 1.4944 - (1.3721)	Acc 45.59 - (50.41)
 *    Acc 50.4100
Epoch: [1]
Train: [98/98]	Time 0.0253 - (0.0512)	Data 0.0010 - (0.0229)	Loss 1.2005 - (1.1991)	Acc 56.85 - (56.88)
Test: [20/20]	Time 0.0060 - (0.1005)	Data 0.0000 - (0.0835)	Loss 1.4547 - (1.2205)	Acc 48.16 - (55.78)
 *    Acc 55.7800
Epoch: [2]
Train: [98/98]	Time 0.0259 - (0.0531)	Data 0.0010 - (0.0239)	Loss 1.0622 - (0.9891)	Acc 62.80 - (64.85)
Test: [20/20]	Time 0.0070 - (0.0963)	Data 0.0010 - (0.0812)	Loss 1.3484 - (1.1511)	Acc 54.04 - (59.17)
 *    Acc 59.1700
Epoch: [3]
Train: [98/98]	Time 0.0239 - (0.0523)	Data 0.0010 - (0.0237)	Loss 0.8891 - (0.8118)	Acc 68.75 - (71.11)
Test: [20/20]	Time 0.0070 - (0.0946)	Data 0.0010 - (0.0841)	Loss 1.3709 - (1.1860)	Acc 57.35 - (59.96)
 *    Acc 59.9600
Epoch: [4]
Train: [98/98]	Time 0.0239 - (0.0512)	Data 0.

In [10]:
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                      ])

transform_test = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                     ])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
val_loader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

print(trainset.data.shape)
print(validationset.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

epoch = 100
num_class = 10

model = resnet18_feature(num_classes=num_class)
model.to(device)

classifier = NearestCentroid()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [9]:
best_acc = 0.

for idx in range(epoch):  # loop over the dataset multiple times
    # train for one epoch
    train(train_loader, model, classifier, criterion, optimizer, idx, device)

    # evaluate on validation set
    acc = validate(val_loader, model, classifier, criterion, device)

    # remember best acc and save checkpoint
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)

    scheduler.step()

print('Finished Training')

TypeError: train() missing 1 required positional argument: 'device'