In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.datasets import CIFAR10
from sklearn.model_selection import KFold
from sklearn import metrics

import torch.nn.functional as TF
import torch.optim as optim
import os
import math
import matplotlib.pyplot as plt
import pickle

torch.set_num_threads(1)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [2]:
# (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]),
])
test_transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]),
])

train_batch_size = 512
val_batch_size = 250
test_batch_size = 100

train_dataset = CIFAR10(root='data/CIFAR10/train', train=True,
                              download=True,transform=transform)
test_dataset = CIFAR10(root='data/CIFAR10/test', train=False,
                             download=True, transform=test_transform)


len(train_dataset)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=test_batch_size)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=10),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        return logits

In [9]:
class CNN5(nn.Module):
    def __init__(self):
        super(CNN5, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3,64,3,1,1),
                        nn.ReLU(),
                        nn.BatchNorm2d(64))
        self.pool =nn.AvgPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Sequential(
                        nn.Conv2d(64,128,3,1,1),
                        nn.ReLU(),
                        nn.BatchNorm2d(128))
        self.conv3 = nn.Sequential(
                        nn.Conv2d(128,128,3,1,1),
                        nn.ReLU(),
                        nn.BatchNorm2d(128))
        self.classifier = nn.Sequential(
                            nn.Linear(128*4*4, 512),
                            nn.ReLU(),
                            nn.BatchNorm1d(512),
                            nn.Dropout(0.2),
                            nn.Linear(512, 512),
                            nn.ReLU(),
                            nn.BatchNorm1d(512),
                            nn.Linear(512,10)
                            )
    def forward(self,x):
        x = self.pool(self.conv1(x))
        x = self.pool(self.conv2(x))
        x = self.pool(self.conv3(x))
        x = x.view((-1,128*4*4))
        return self.classifier(x)
#         return TF.softmax(self.fc2(x),dim=-1)

In [10]:
LOG_DIR = 'Logs/'
SAVE_DIR = 'Models/'
for SEED in range(5):
    torch.manual_seed(SEED)
    train, val = random_split(train_dataset,[int(0.9*len(train_dataset)),int(0.1*len(train_dataset))])
    
#     all_index = np.arange(len(train_dataset))
#     np.random.shuffle(all_index)
#     train_index = all_index[0:int(0.9*len(train_dataset))]
#     val_index = all_index[int(0.9*len(train_dataset)):]
    
    train_loader = DataLoader(train, shuffle=True, batch_size=train_batch_size)
    val_loader = DataLoader(val, shuffle=True, batch_size=train_batch_size)


#     model = LeNet5().double().to(device)
    model = CNN5().double().to(device)
#     optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    LR_STEP = [5]
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,LR_STEP,gamma=0.1)

    cost = nn.CrossEntropyLoss()
    epoch = 10
    train_loss = []
    val_loss = []
    LEARN_SEED = 42
    torch.manual_seed(LEARN_SEED)
    best_val_acc = 0.0
    for _epoch in range(epoch):
        for idx, (train_x, train_label) in enumerate(train_loader):
            train_x, train_label = train_x.double().to(device), train_label.to(device)
            optimizer.zero_grad()
            outputs = model(train_x)
            loss = cost(outputs, train_label)
            loss.backward()
            optimizer.step()
            if idx % 50 == 0:
                print('Epoch:%d, idx:%d, loss:%.6f'%(_epoch, idx, loss.sum().item()))
            train_loss.append(loss.sum().item())
        train_scheduler.step()
        correct = 0
        _sum = 0

        for idx, (val_x, val_label) in enumerate(val_loader):
            val_x, val_label = val_x.double().to(device), val_label.to(device)
            outputs = model(val_x).detach()
            t_loss = cost(outputs, val_label)
            predict_ys = torch.argmax(outputs, axis=-1)
            _ = predict_ys.detach().data == val_label
            correct += torch.sum(_, axis=-1)
            _sum += _.shape[0]
            val_loss.append(t_loss.sum().item())
        val_acc = 100*correct / _sum
        print('Validation accuracy: %.4f'%val_acc)
    
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_ckpt = {'net':model.state_dict(),
                        'optim':optimizer.state_dict(),
                        'epoch':_epoch,
                        'val_acc':best_val_acc}
            best_save_path = SAVE_DIR + "CIFAR10_CNN_Val_SEED_%d_model"%SEED
            torch.save(best_ckpt, best_save_path)
            
    log_save_path = LOG_DIR + "CIFAR10_CNN_Val_SEED_%d_log"%SEED

    pickle.dump([train_loss, val_loss], open(log_save_path,"wb"))

Epoch:0, idx:0, loss:2.475192
Epoch:0, idx:50, loss:1.401187
Validation accuracy: 54.8200
Epoch:1, idx:0, loss:1.196840
Epoch:1, idx:50, loss:1.103856
Validation accuracy: 62.4800
Epoch:2, idx:0, loss:0.928010
Epoch:2, idx:50, loss:0.827095
Validation accuracy: 68.9400
Epoch:3, idx:0, loss:0.739573
Epoch:3, idx:50, loss:0.677014
Validation accuracy: 71.9600
Epoch:4, idx:0, loss:0.573635
Epoch:4, idx:50, loss:0.697014
Validation accuracy: 75.1400
Epoch:5, idx:0, loss:0.481189
Epoch:5, idx:50, loss:0.322636
Validation accuracy: 78.2800
Epoch:6, idx:0, loss:0.318092
Epoch:6, idx:50, loss:0.293280
Validation accuracy: 78.7000
Epoch:7, idx:0, loss:0.215598
Epoch:7, idx:50, loss:0.231884
Validation accuracy: 78.0200
Epoch:8, idx:0, loss:0.258276
Epoch:8, idx:50, loss:0.210745
Validation accuracy: 78.7400
Epoch:9, idx:0, loss:0.194529
Epoch:9, idx:50, loss:0.167714
Validation accuracy: 78.5000
Epoch:0, idx:0, loss:2.422695
Epoch:0, idx:50, loss:1.365348
Validation accuracy: 56.7200
Epoch:1, i

In [11]:
for SEED in range(5):
    best_save_path = SAVE_DIR + "CIFAR10_CNN_Val_SEED_%d_model"%SEED
    print(torch.load(best_save_path)['val_acc'])

tensor(78.7400, device='cuda:0')
tensor(80.0600, device='cuda:0')
tensor(79.1000, device='cuda:0')
tensor(80.5400, device='cuda:0')
tensor(78.9800, device='cuda:0')


In [13]:
SAVE_DIR = 'Models/'
for SEED in range(5):
    torch.manual_seed(SEED)
    train, val = random_split(train_dataset,[int(0.9*len(train_dataset)),int(0.1*len(train_dataset))])
    
    train_loader = DataLoader(train, shuffle=True, batch_size=train_batch_size)
    val_loader = DataLoader(val, shuffle=True, batch_size=val_batch_size)
    
    best_save_path = SAVE_DIR + "CIFAR10_CNN_Val_SEED_%d_model"%SEED
    model = CNN5().double().to(device)
    model.load_state_dict(torch.load(best_save_path)['net'])
    
    y_vals = []
    y_vals_onehot = []
    y_outputs = []
    y_preds = []
    for idx, (val_x, val_label) in enumerate(val_loader):
        val_x, val_label = val_x.double().to(device), val_label.to(device)
        y_vals.append(val_label.cpu().data)
        y_vals_onehot.append(TF.one_hot(val_label.cpu().data,10).numpy())
        outputs = model(val_x).detach()
        y_output = TF.softmax(outputs,-1)
        y_outputs.append(y_output.detach().cpu().data.numpy())
        y_pred = torch.argmax(outputs, axis=-1)
        y_preds.append(y_pred.detach().cpu().data.numpy())
        
    y_vals = torch.stack(y_vals,0).numpy()
    y_vals = np.array(y_vals).reshape([-1,1])
    y_vals_onehot = np.eye(10)[y_vals].reshape([-1,10])
    y_preds = np.array(y_preds).reshape([-1,1])
    y_outputs = np.array(y_outputs).reshape([-1,10])
    print(metrics.accuracy_score(y_vals,y_preds))
    print(metrics.f1_score(y_vals,y_preds,average='weighted'))
    print(metrics.roc_auc_score(y_vals_onehot,y_outputs,average=None))

0.79
0.7898787064510552
[0.98192247 0.9935524  0.95589057 0.93673521 0.97084615 0.95590862
 0.98503009 0.98338936 0.99261239 0.9901994 ]
0.791
0.7906085315219561
[0.98582368 0.99390561 0.96179778 0.94075977 0.96608717 0.9631363
 0.98476434 0.98613404 0.99359354 0.98529813]
0.789
0.7892196434579926
[0.98204814 0.99379228 0.96010843 0.93952928 0.96956999 0.95580408
 0.98395131 0.98492237 0.99323413 0.98994507]
0.799
0.7995330113189877
[0.9811738  0.99455143 0.96426019 0.95355115 0.97593204 0.9613186
 0.98549902 0.98241566 0.99077053 0.99405649]
0.7856
0.7858755251175678
[0.98311079 0.99143665 0.95706432 0.93999018 0.9701727  0.95863125
 0.98531294 0.98415577 0.99206875 0.98487129]


In [14]:
y_te = test_dataset.targets
LOG_DIR = 'Logs/'
SAVE_DIR = 'Models/'

SEED = 3
best_save_path = SAVE_DIR + "CIFAR10_CNN_Val_SEED_%d_model"%SEED
model = CNN5().double().to(device)
model.load_state_dict(torch.load(best_save_path)['net'])
y_vals = []
y_vals_onehot = []
y_outputs = []
y_preds = []
for idx, (val_x, val_label) in enumerate(test_loader):
    val_x, val_label = val_x.double().to(device), val_label.to(device)
    y_vals.append(val_label.cpu().data)
    y_vals_onehot.append(TF.one_hot(val_label.cpu().data,10).numpy())
    outputs = model(val_x).detach()
    y_output = TF.softmax(outputs,-1)
    y_outputs.append(y_output.detach().cpu().data.numpy())
    y_pred = torch.argmax(outputs, axis=-1)
    y_preds.append(y_pred.detach().cpu().data.numpy())

y_vals = torch.stack(y_vals,0).numpy()
y_vals = np.array(y_vals).reshape([-1,1])
y_vals_onehot = np.eye(10)[y_vals].reshape([-1,10])
y_preds = np.array(y_preds).reshape([-1,1])
y_outputs = np.array(y_outputs).reshape([-1,10])
print(metrics.accuracy_score(y_vals,y_preds))
print(metrics.f1_score(y_vals,y_preds,average='weighted'))
print(metrics.roc_auc_score(y_vals_onehot,y_outputs,average=None))

0.782
0.7816714609262266
[0.97953433 0.99224767 0.95125511 0.93657278 0.97004967 0.95776578
 0.98333356 0.98494644 0.98996589 0.990095  ]
