## Using CNN and NCM for CIFAR-10 feature extraction and classification

### Summary
CIFAR-10 데이터셋을 분류하기 위해 CNN을 이용하여 데이터셋에 대한 특징을 추출 후 NCM을 이용하여 각 class에 대한 평균 값을 이용하여 분류

<span style="color: #2D3748; background-color:#fff5b1;">Test size를 0.2로 10번 반복 실험한 결과 평균적으로 0.34의 정확도를 보여주고 있고, test 데이터 1개를 분류하는데 0.0011초의 시간이 걸린다.</span>

In [2]:
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from utils.util import Info
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from sklearn.neighbors import KNeighborsClassifier
from utils.Data_Classifier import train, validate, save_checkpoint

c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
c:\Users\LAB\Anaconda3\envs\lab\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


In [3]:
class Config(Info):
    def __init__(self):
        super(Info, self).__init__()
        self.device = 'PC'
        self.dataset = 'CIFAR_10'
        self.test_size = 0.2
        self.feature_size = 3072
        self.method = 'kNN'
        self.k = 1
        self.distance = 'Euclidean'
        self.reduction_method = [None, None] # method, n_components
        self.iter = 10
    

In [4]:
cig = Config()
cig.info()
cig.print_rutin()

Device ── PC
│
├──Dataset
│    └────CIFAR_10
│    └────Train size 80%
│    └────Feature size: 3072
│
├──Method
│    └────kNN
│    └────k = 1
│    └────Euclidean
│
├──Dimension reduction
│    └────Method: None
│    └────Component size: None
│    └────Feature Reduction Ratio: None%
│
└──Iteration
    └────10
PC - CIFAR_10(80%) - kNN - 10 iteration


In [5]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if use multi-GPU
cudnn.deterministic = True  # 연산 처리 속도 감소 -> 모델과 코드를 배포해야 하는 연구 후반 단계에 사용
cudnn.benchmark = False

## Load CIFAR-10 Dataset

In [6]:
transform_train = transforms.Compose([transforms.Resize(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                      ])

transform_test = transforms.Compose([transforms.Resize(224),
                                     transforms.ToTensor(),                                     
                                     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                     ])

batch_size = 512

trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
val_loader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

print(trainset.data.shape)
print(validationset.data.shape)

Files already downloaded and verified
Files already downloaded and verified
(50000, 32, 32, 3)
(10000, 32, 32, 3)


In [7]:
from utils.NCM_Classifier import train, validate, save_checkpoint
from sklearn.decomposition import PCA

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

num_class = 10

model = resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.to(device)

## Fine-tuning kNN(Euclidean) instead of Fully Connected Layer

In [8]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

knn = KNeighborsClassifier(cig.k, weights='distance', n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")


Finished Training

Total Training Time : 55.88 sec
Total Validation Time : 23.83 sec

Total Best Validation Accuracy : 80.29 %


In [9]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

pca = PCA(n_components=9, random_state=0)
x_train = pca.fit_transform(x_train)

knn = KNeighborsClassifier(cig.k, weights='distance', n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

x_test = pca.transform(x_test)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")


Finished Training

Total Training Time : 52.90 sec
Total Validation Time : 12.55 sec

Total Best Validation Accuracy : 63.18 %


## Fine-tuning kNN(Mahalanobis) instead of Fully Connected Layer

In [10]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

knn = KNeighborsClassifier(cig.k, weights='distance', metric='mahalanobis', metric_params={'VI': np.cov(x_train, rowvar=False)}, n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")

In [9]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

pca = PCA(n_components=9, random_state=0)
x_train = pca.fit_transform(x_train)

knn = KNeighborsClassifier(cig.k, weights='distance', metric='mahalanobis', metric_params={'VI': np.cov(x_train, rowvar=False)}, n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

x_test = pca.transform(x_test)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")


Finished Training

Total Training Time : 56.58 sec
Total Validation Time : 11.97 sec

Total Best Validation Accuracy : 10.26 %


  return tree.query(*args, **kwargs)


## Fine-tuning kNN(Cosine) instead of Fully Connected Layer

In [10]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

knn = KNeighborsClassifier(cig.k, weights='distance', metric='cosine', n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")


Finished Training

Total Training Time : 52.11 sec
Total Validation Time : 22.72 sec

Total Best Validation Accuracy : 81.89 %


In [11]:
best_acc = 0.

# train for one epoch
end = time.time()
x_train, y_train = train(train_loader, model, device)

pca = PCA(n_components=9, random_state=0)
x_train = pca.fit_transform(x_train)

knn = KNeighborsClassifier(cig.k, weights='distance', metric='cosine', n_jobs=-1)
knn.fit(x_train, y_train)
train_time = time.time() - end

# evaluate on validation set
end = time.time()
x_test, y_test = validate(val_loader, model, device)

x_test = pca.transform(x_test)

test_score = knn.score(x_test, y_test)
val_time = time.time() - end

print('\nFinished Training\n')
print("Total Training Time : %.2f" % train_time, "sec")
print("Total Validation Time : %.2f" % val_time, "sec")
print("\nTotal Best Validation Accuracy : %.2f" % (test_score*100), "%")


Finished Training

Total Training Time : 46.06 sec
Total Validation Time : 17.38 sec

Total Best Validation Accuracy : 63.89 %
