In [1]:
import argparse
import os, sys
import time
import datetime

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm

# You cannot change this line.
from tools.dataloader import CIFAR10,TESTSET

In [2]:
# Create the neural network module: ResNet
class Block(nn.Module):
    def __init__(self, cin, cout):
        super().__init__()
        self.conv1 = nn.Conv2d(cin, cout, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(cout)
        self.conv2 = nn.Conv2d(cout, cout, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(cout)
        
        if cin == cout:
            self.res = lambda x:x
        else:
            self.res = nn.Sequential(
                nn.Conv2d(cin, cout, kernel_size=1),
                nn.BatchNorm2d(cout))
            
    def forward(self, x):
        x2 = self.conv1(x)
        x2 = self.bn1(x2)
        x2 = F.relu(x2)
        x2 = self.bn2(self.conv2(x2))
        res = self.res(x)
        x = F.relu(x2.add_(res))
        return x
    
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.conv_in = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.bn_in = nn.BatchNorm2d(16)
        self.block1 = Block(16, 16)
        self.block2 = Block(16, 16)
        self.block3 = Block(16, 32)
        self.block4 = Block(32, 32)
        self.block5 = Block(32, 64)
        self.block6 = Block(64, 64)
        self.block7 = Block(64, 128)
        self.block8 = Block(128, 128)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(128, 10)
        


    def forward(self, x):
        x = F.relu(self.bn_in(self.conv_in(x)))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.avgpool(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x

In [3]:
# Setting some hyperparameters
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 100
TEST_BATCH_SIZE = 1
INITIAL_LR = 0.01
MOMENTUM = 0.9
REG = 5e-4
EPOCHS = 200
DATAROOT = "./data"
CHECKPOINT_PATH = "./saved_model"

In [4]:
# Specify preprocessing function.
# Reference mean/std value for 
transform_train = transforms.Compose([
    transforms.ColorJitter(hue=.05, saturation=.05),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


In [5]:
# Call the dataset Loader
trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=1)
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=1)
testset = TESTSET(root=DATAROOT, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=1)


Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified


In [6]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = ResNet()
net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

Train on GPU...


In [7]:
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = False
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

Successfully loaded checkpoint: ./saved_model/model.h5
Starting from epoch 150 
Starting from learning rate 0.002000:


In [8]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)

In [9]:
# Start the training/validation process
# The process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
global_step = 0
best_val_acc = 0

for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    # Train the training dataset for 1 epoch.
    print(len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        # Now backward loss
        loss.backward()
        # Apply gradient
        optimizer.step()
        # Calculate predicted labels
        _, predicted = outputs.max(1)
        # Calculate accuracy
        total_examples += len(inputs)
        correct_examples += predicted.eq(targets.view_as(predicted)).sum().item()

        train_loss += loss

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss / (batch_idx + 1)
        pass
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    print(datetime.datetime.now())
    # Validate on the validation dataset
    print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            # Calculate accuracy
            total_examples += len(inputs)
            correct_examples += predicted.eq(targets.view_as(predicted)).sum().item()
            val_loss += loss

    avg_loss = val_loss / len(valloader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    DECAY_EPOCHS = 60
    DECAY = 0.2
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate = optimizer.param_groups[0]['lr'] * DECAY
        for param_group in optimizer.param_groups:
            # Assign the learning rate parameter
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'model.h5'))

print("Optimization finished.")

2019-10-02 05:39:41.574602
Epoch 150:
704
Training loss: 0.2763, Training accuracy: 0.9046
2019-10-02 05:40:39.495835
Validation...
Validation loss: 0.3513, Validation accuracy: 0.8800
Saving ...
2019-10-02 05:40:42.097624
Epoch 151:
704
Training loss: 0.2820, Training accuracy: 0.9030
2019-10-02 05:41:36.172818
Validation...
Validation loss: 0.3448, Validation accuracy: 0.8830
Saving ...
2019-10-02 05:41:39.021573
Epoch 152:
704
Training loss: 0.2783, Training accuracy: 0.9030
2019-10-02 05:42:33.548106
Validation...
Validation loss: 0.3598, Validation accuracy: 0.8884
Saving ...
2019-10-02 05:42:36.269045
Epoch 153:
704
Training loss: 0.2774, Training accuracy: 0.9042
2019-10-02 05:43:30.105297
Validation...
Validation loss: 0.3573, Validation accuracy: 0.8806
2019-10-02 05:43:32.529764
Epoch 154:
704
Training loss: 0.2823, Training accuracy: 0.9014
2019-10-02 05:44:25.763270
Validation...
Validation loss: 0.3497, Validation accuracy: 0.8782
2019-10-02 05:44:28.378830
Epoch 155:
704


In [10]:
net.eval()

# Open CSV file for writing the predictions
save_dir = "submission.csv" #where you want the file to be downloaded to 
csv = open(save_dir, "w")
columnTitleRow = "Id,Category\n"
csv.write(columnTitleRow)

print("Start validating test data")
# Disable gradient during validation
with torch.no_grad():
    for batch_idx, inputs in enumerate(testloader):
        if(batch_idx % 100 == 0):
            print(batch_idx)
        # Copy inputs to device
        inputs = inputs.to(device)
        # Generate output from the DNN.
        outputs = net(inputs)
        # Calculate predicted labels  
        _, predicted = outputs.max(1)
        # Write CSV Row
        row = str(batch_idx) + "," + str(predicted.item()) + "\n"
        csv.write(row)

csv.close()
print("Complete! Predictions are saved.")

Start validating test data
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
Complete! Predictions are saved.
