In [2]:
## Daily use
import argparse
import os, sys
import time
import datetime

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import Dataset as Dataset
from torch.utils.data import DataLoader as DataLoader
from tqdm import tqdm_notebook as tqdm
from PIL import Image

## test GPU
torch.cuda.is_available()

True

In [3]:
# load data
# change path to your own path.
path = "/home/jovyan/work/cifar10" 

train_x = np.load(os.path.join(path, "cifar10-batches-images-train.npy"))
train_y = np.load(os.path.join(path, "cifar10-batches-labels-train.npy"))
val_x = np.load(os.path.join(path, "cifar10-batches-images-val.npy"))
val_y = np.load(os.path.join(path, "cifar10-batches-labels-val.npy"))
test_x = np.load(os.path.join(path, "cifar10-batches-images-test.npy"))

# check mean and std
print(np.mean(train_x/255, axis=(0,1,2)))
print(np.std(train_x/255, axis=(0,1,2)))


[0.49205986 0.48287897 0.44727413]
[0.24727619 0.2436752  0.26168613]


In [4]:
## transform
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.49205986, 0.48287897, 0.44727413], [0.24727619, 0.2436752, 0.26168613])
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.49205986, 0.48287897, 0.44727413], [0.24727619, 0.2436752, 0.26168613])
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.49205986, 0.48287897, 0.44727413], [0.24727619, 0.2436752, 0.26168613])
])

In [5]:
test_x.shape

(10000, 32, 32, 3)

In [6]:
## Hpyerparameters
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100
INITIAL_LR = 0.03
MOMENTUM = 0.9
REG = 5e-4
EPOCHS = 100
CHECKPOINT_PATH = "./saved_model"

In [7]:
## Data Loader
class data(Dataset):
    def __init__(self, imgs, labels, transform):
        self.X = imgs
        self.y = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        img = self.X[index]
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, self.y[index]

train_data = data(train_x, train_y, transform = train_transform)
val_data = data(val_x, val_y, transform = train_transform)

# create empty label for test_data
empty = [-1] * test_x.shape[0]
test_data = data(test_x, empty, transform = test_transform)


train_loader = DataLoader(train_data, shuffle = True, batch_size = TRAIN_BATCH_SIZE, num_workers = 1)
val_loader = DataLoader(val_data, shuffle = True, batch_size = TRAIN_BATCH_SIZE, num_workers = 1)
test_loader = DataLoader(test_data, shuffle = False, batch_size = TRAIN_BATCH_SIZE, num_workers = 1)

In [8]:
for X,y in train_loader:
    print(X.shape)
    print(X.min())
    break

torch.Size([128, 3, 32, 32])
tensor(-1.9899)


In [9]:
## create Model

## flatten layer
class flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0],-1)

# Create the neural network module: LeNet-5
class MyDNN(nn.Module):
    def __init__(self):
        super(MyDNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.pool12 = nn.MaxPool2d(2)
        self.dropout1 = nn.Dropout(0.2)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv4_bn = nn.BatchNorm2d(64)
        self.pool34 = nn.MaxPool2d(2)
        self.dropout2 = nn.Dropout(0.2)
        
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv5_bn = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv6_bn = nn.BatchNorm2d(128)
        self.pool56 = nn.MaxPool2d(2)
        self.dropout3 = nn.Dropout(0.2)
        
        
        self.flat  = flatten()
        
        self.fc1 = nn.Linear(4*4*128, 256)
        self.fc1_bn = nn.BatchNorm1d(256)
        self.dropout4 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 64)
        self.fc2_bn = nn.BatchNorm1d(64)
        self.dropout5 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 10)
        
        nn.init.xavier_normal_(self.conv1.weight)
        nn.init.xavier_normal_(self.conv2.weight)
        nn.init.xavier_normal_(self.conv3.weight)
        nn.init.xavier_normal_(self.conv4.weight)
        nn.init.xavier_normal_(self.conv5.weight)
        nn.init.xavier_normal_(self.conv6.weight)
        nn.init.xavier_normal_(self.fc1.weight)
        nn.init.xavier_normal_(self.fc2.weight)
        nn.init.xavier_normal_(self.fc3.weight)
        
        
        

    def forward(self, x):
        conv_pool12 = self.pool12(self.conv2_bn(F.relu(self.conv2(self.conv1_bn(F.relu(self.conv1(x)))))))
        conv_pool12 = self.dropout1(conv_pool12)
        conv_pool34 = self.pool34(self.conv4_bn(F.relu(self.conv4(self.conv3_bn(F.relu(self.conv3(conv_pool12)))))))
        conv_pool34 = self.dropout2(conv_pool34)
        conv_pool56 = self.pool56(self.conv6_bn(F.relu(self.conv6(self.conv5_bn(F.relu(self.conv5(conv_pool34)))))))
        conv_pool56 = self.dropout3(conv_pool56)
        
        return self.fc3(self.dropout5(self.fc2_bn(F.relu(self.fc2(self.dropout4(self.fc1_bn(F.relu(self.fc1(self.flat(conv_pool56))))))))))

In [10]:
net = MyDNN()
x = torch.zeros([128,3,32,32])
print(net(x).shape)

torch.Size([128, 10])


In [35]:
class sequential_mydnn(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        
        self.conv_2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        
        self.conv_3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        
        self.fc = nn.Sequential(
            flatten(),
            nn.Linear(4*4*128, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            nn.Linear(64,10)
        )
        
    def forward(self, x):
        return self.fc(self.conv_3(self.conv_2(self.conv_1(x))))

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)
    
net2 = sequential_mydnn()
x = torch.zeros([128,3,32,32])

net2.apply(weights_init) ## apply to print(net2.children)

<bound method Module.children of sequential_mydnn(
  (conv_1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.2, inplace=False)
  )
  (conv_2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, 

sequential_mydnn(
  (conv_1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.2, inplace=False)
  )
  (conv_2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    

In [19]:
print(net2)

sequential_mydnn(
  (conv_1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.2, inplace=False)
  )
  (conv_2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    

In [15]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = MyDNN()
net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

Train on GPU...


In [16]:
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM)

In [17]:
TRAIN_FROM_SCRATCH = False
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

Successfully loaded checkpoint: ./saved_model/model.h5
Starting from epoch 144 
Starting from learning rate 0.000786:


In [12]:
global_step = 0
best_val_acc = 0

train_acc_recorder = []
train_loss_recorder = []
val_acc_recorder = []
val_loss_recorder = []



EPOCHS = 150

for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())

    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0

    print(len(train_loader))
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        for param in net.parameters():
            loss += REG * (param**2).sum()
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs, 1)
        total_examples += len(predicted)
        correct_examples += (predicted==targets).sum().item()
        train_loss += loss

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss / (batch_idx + 1)
        pass
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    
    train_acc_recorder.append(avg_acc)
    train_loss_recorder.append(avg_loss)
    
    print(datetime.datetime.now())
    print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            _, predicted = outputs.max(1)
            total_examples += len(predicted)
            correct_examples += (predicted==targets).sum().item()
            val_loss += loss

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    val_acc_recorder.append(avg_acc)
    val_loss_recorder.append(avg_loss)
        

    DECAY_EPOCHS = 2
    DECAY = 0.95
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = current_learning_rate*DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
            # Assign the learning rate parameter
            
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'model.h5'))

print("Optimization finished.")

2019-09-19 01:05:11.571606
Epoch 0:
352
Training loss: 2.9043, Training accuracy: 0.3138
2019-09-19 01:05:35.345976
Validation...
Validation loss: 1.5760, Validation accuracy: 0.4202
Saving ...
2019-09-19 01:05:37.020147
Epoch 1:
352
Training loss: 2.3686, Training accuracy: 0.4316
2019-09-19 01:05:54.374569
Validation...
Validation loss: 1.3560, Validation accuracy: 0.5004
Saving ...
2019-09-19 01:05:56.071644
Epoch 2:
352
Training loss: 2.0786, Training accuracy: 0.5102
2019-09-19 01:06:13.760325
Validation...
Validation loss: 1.1811, Validation accuracy: 0.5718
Current learning rate has decayed to 0.028500
Saving ...
2019-09-19 01:06:15.457223
Epoch 3:
352
Training loss: 1.8096, Training accuracy: 0.5740
2019-09-19 01:06:32.724504
Validation...
Validation loss: 1.0323, Validation accuracy: 0.6292
Saving ...
2019-09-19 01:06:34.432360
Epoch 4:
352
Training loss: 1.6069, Training accuracy: 0.6289
2019-09-19 01:06:52.173363
Validation...
Validation loss: 0.8785, Validation accuracy: 0.

In [41]:
## load model
CKPT_PATH = "./saved_model/model.h5"
ckpt = get_checkpoint(CKPT_PATH)
net.load_state_dict(ckpt['net'])
start_epoch = ckpt['epoch'] + 1
current_learning_rate = ckpt['lr']

## predicted values
pred = np.zeros([10000,2])
pred[:,0] = np.arange(10000)

net.eval()
with torch.no_grad():
    for idx, (inputs, _) in enumerate(test_loader):
        inputs = inputs.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        _, predicted = outputs.max(1)
        pred[idx*TRAIN_BATCH_SIZE:(idx*TRAIN_BATCH_SIZE+len(predicted)), 1] =predicted.cpu().data.numpy()
        
np.savetxt('pred_labels.csv', pred.astype(int), fmt = "%d", delimiter=',', header='Id,Category', comments='')
