## Using CNN and NCM for CIFAR-10 feature extraction and classification

### Summary
CIFAR-10 데이터셋을 분류하기 위해 CNN을 이용하여 데이터셋에 대한 특징을 추출 후 NCM을 이용하여 각 class에 대한 평균 값을 이용하여 분류

<span style="color: #2D3748; background-color:#fff5b1;">Test size를 0.2로 10번 반복 실험한 결과 평균적으로 0.34의 정확도를 보여주고 있고, test 데이터 1개를 분류하는데 0.0011초의 시간이 걸린다.</span>

In [1]:
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from utils.Data_Classifier import train, validate, save_checkpoint

c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


In [2]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if use multi-GPU
cudnn.deterministic = True  # 연산 처리 속도 감소 -> 모델과 코드를 배포해야 하는 연구 후반 단계에 사용
cudnn.benchmark = False

## Load CIFAR-10 Dataset

In [4]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                ])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)
val_loader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

print(trainset.data.shape)
print(validationset.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

epoch = 100
num_class = 10

model = resnet18(num_classes=num_class)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [19]:
best_acc = 0.

for idx in range(epoch):  # loop over the dataset multiple times
    # train for one epoch
    train(train_loader, model, criterion, optimizer, idx, device)

    # evaluate on validation set
    acc = validate(val_loader, model, criterion, device)

    # remember best acc@1 and save checkpoint
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)

    scheduler.step()

print('Finished Training')

Epoch: [0]
Train: [98/98]	Time 0.0484	Data 0.0225	Loss 2.1219	Acc 33.14
Test: [20/20]	Time 0.0944	Data 0.0852	Loss 16.4620	Acc 39.76
Epoch: [1]
Train: [98/98]	Time 0.0491	Data 0.0227	Loss 1.5159	Acc 47.11
Test: [20/20]	Time 0.0937	Data 0.0826	Loss 1.3771	Acc 51.05
Epoch: [2]
Train: [98/98]	Time 0.0501	Data 0.0232	Loss 1.3283	Acc 53.97
Test: [20/20]	Time 0.0961	Data 0.0838	Loss 1.2378	Acc 55.38
Epoch: [3]
Train: [98/98]	Time 0.0482	Data 0.0226	Loss 1.1447	Acc 60.39
Test: [20/20]	Time 0.0902	Data 0.0801	Loss 1.1518	Acc 59.36
Epoch: [4]
Train: [98/98]	Time 0.0488	Data 0.0227	Loss 1.0506	Acc 64.86
Test: [20/20]	Time 0.0920	Data 0.0817	Loss 11.4038	Acc 48.03
Epoch: [5]
Train: [98/98]	Time 0.0501	Data 0.0233	Loss 0.9206	Acc 69.23
Test: [20/20]	Time 0.0973	Data 0.0846	Loss 1.0033	Acc 66.22
Epoch: [6]
Train: [98/98]	Time 0.0502	Data 0.0233	Loss 0.7450	Acc 74.36
Test: [20/20]	Time 0.0933	Data 0.0801	Loss 0.9484	Acc 68.70
Epoch: [7]
Train: [98/98]	Time 0.0501	Data 0.0232	Loss 0.6725	Acc 76.86
Te

In [8]:
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                      ])

transform_test = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                     ])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
val_loader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

print(trainset.data.shape)
print(validationset.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

epoch = 100
num_class = 10

model = resnet18(num_classes=num_class)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [20]:
best_acc = 0.
train_time = []
val_time = []

for idx in range(epoch):  # loop over the dataset multiple times

    # train for one epoch
    end = time.time()
    train(train_loader, model, criterion, optimizer, idx, device)
    train_time.append(time.time() - end)

    # evaluate on validation set
    end = time.time()
    acc = validate(val_loader, model, criterion, device)
    val_time.append(time.time() - end)

    # remember best acc and save checkpoint
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)

    save_checkpoint({
            'epoch': idx + 1,
            'arch': "ResNet-18",
            'state_dict': model.state_dict(),
            'best_acc1': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

    scheduler.step()

train_time = np.array(train_time)
val_time = np.array(val_time)

print('\nFinished Training\n')
print("Total Average Training Time : %.2f ± %.2f" % (train_time.mean(), train_time.std()), "sec")
print("Total Average Validation Time : %.2f ± %.2f" % (val_time.mean(), val_time.std()), "sec")
print("\nTotal Best Validation Accuracy : %.2f" % best_acc.real, "%")

Epoch: [0]
Train: [98/98]	Time 0.0507	Data 0.0257	Loss 2.4011	Acc 26.14
Test: [20/20]	Time 0.0966	Data 0.0867	Loss 1.6888	Acc 36.56
Epoch: [1]
Train: [98/98]	Time 0.0498	Data 0.0268	Loss 1.6160	Acc 40.87
Test: [20/20]	Time 0.1014	Data 0.0895	Loss 1.4505	Acc 47.16
Epoch: [2]
Train: [98/98]	Time 0.0521	Data 0.0237	Loss 1.4164	Acc 48.46
Test: [20/20]	Time 0.1051	Data 0.0901	Loss 1.4541	Acc 51.48
Epoch: [3]
Train: [98/98]	Time 0.0494	Data 0.0271	Loss 1.3467	Acc 51.42
Test: [20/20]	Time 0.0908	Data 0.0812	Loss 1.2162	Acc 56.30
Epoch: [4]
Train: [98/98]	Time 0.0570	Data 0.0294	Loss 1.1840	Acc 57.40
Test: [20/20]	Time 0.1060	Data 0.0886	Loss 1.1111	Acc 60.65
Epoch: [5]
Train: [98/98]	Time 0.0664	Data 0.0384	Loss 1.0837	Acc 61.32
Test: [20/20]	Time 0.1050	Data 0.0890	Loss 0.9941	Acc 64.99
Epoch: [6]
Train: [98/98]	Time 0.0648	Data 0.0379	Loss 1.0036	Acc 64.38
Test: [20/20]	Time 0.1046	Data 0.0877	Loss 0.9555	Acc 66.16
Epoch: [7]
Train: [98/98]	Time 0.0648	Data 0.0375	Loss 0.9355	Acc 67.07
Test

In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

epoch = 10
num_class = 10

model = resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
num_feature = model.fc.in_features
lda = LinearDiscriminantAnalysis(n_com)
model.fc = nn.Linear(num_feature, num_class)
model.to(device)

feature_extract = True

params_to_update = model.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(params_to_update, lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

Params to learn:
	 fc.weight
	 fc.bias


In [10]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
best_acc = 0.
train_time = []
val_time = []

for idx in range(epoch):  # loop over the dataset multiple times

    # train for one epoch
    end = time.time()
    train(train_loader, model, criterion, optimizer, idx, device)
    train_time.append(time.time() - end)

    # evaluate on validation set
    end = time.time()
    acc = validate(val_loader, model, criterion, device)
    val_time.append(time.time() - end)

    # remember best acc and save checkpoint
    is_best = acc > best_acc
    best_acc = max(acc, best_acc)

    save_checkpoint({
            'epoch': idx + 1,
            'arch': "ResNet-18",
            'state_dict': model.state_dict(),
            'best_acc1': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

    scheduler.step()

train_time = np.array(train_time)
val_time = np.array(val_time)

print('\nFinished Training\n')
print("Total Average Training Time : %.2f ± %.2f" % (train_time.mean(), train_time.std()), "sec")
print("Total Average Validation Time : %.2f ± %.2f" % (val_time.mean(), val_time.std()), "sec")
print("\nTotal Best Validation Accuracy : %.2f" % best_acc.real, "%")