In [1]:
import os
import sys
import pickle
import argparse

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

from utils import avoidWarnings
from beautifultable import BeautifulTable as BT

avoidWarnings()

''' OPTIMIZER PARAMETERS - Analysis on those '''

test = False
best_acc = 0  
start_epoch = 0  
num_epochs = 200  ## TODO: set to args.epochs
batch_size = 128  ## TODO: set to args.barch
milestones = [100, 150]

L = 16
M = 32
E = 7
 
comments = True
n_workers = torch.multiprocessing.cpu_count()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
gpus = True if torch.cuda.device_count() > 1 else False
device_name = torch.cuda.get_device_name(0) if device == 'cuda' else 'CPUs'
    
table = BT()
table.append_row(['Python Version', sys.version[:5]])
table.append_row(['PyTorch Version', torch.__version__])
table.append_row(['Device', str(device_name)])
table.append_row(['Cores', str(n_workers)])
table.append_row(['GPUs', str(torch.cuda.device_count())])
table.append_row(['CUDNN Enabled', str(torch.backends.cudnn.enabled)])
table.append_row(['Architecture', 'DenseNet x7'])
table.append_row(['Dataset', 'CIFAR10'])
table.append_row(['Epochs', str(num_epochs)])
table.append_row(['Batch Size', str(batch_size)])

print(table)

+-----------------+----------------------+
| Python Version  |        3.6.5         |
+-----------------+----------------------+
| PyTorch Version |        1.0.0         |
+-----------------+----------------------+
|     Device      | Tesla V100-SXM2-16GB |
+-----------------+----------------------+
|      Cores      |          8           |
+-----------------+----------------------+
|      GPUs       |          1           |
+-----------------+----------------------+
|  CUDNN Enabled  |         True         |
+-----------------+----------------------+
|  Architecture   |     DenseNet x7      |
+-----------------+----------------------+
|     Dataset     |       CIFAR10        |
+-----------------+----------------------+
|     Epochs      |         200          |
+-----------------+----------------------+
|   Batch Size    |         128          |
+-----------------+----------------------+


In [2]:
# Data
# ----

avoidWarnings()
dataset = 'MNIST'
dataset = 'CIFAR'
from data import dataloaders
trainloader, testloader, classes = dataloaders(dataset, batch_size)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Models 
# ------
    
avoidWarnings()
comments = True
from utils import count_parameters
from models import Conv_Recusive_Net
net = Conv_Recusive_Net('recursive_net', layers=L, filters=M)


print('Recursive ConvNet')
if comments: print(net)
print('\n\n\t\tParameters: {}M'.format(count_parameters(net)/1e6))

optimizers = []
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)


Recursive ConvNet
Conv_Recusive_Net(
  (act): ReLU(inplace)
  (V): Conv2d(3, 32, kernel_size=(8, 8), stride=(1, 1), padding=(3, 3))
  (P): MaxPool2d(kernel_size=4, stride=4, padding=2, dilation=1, ceil_mode=False)
  (W): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc): Linear(in_features=2048, out_features=10, bias=True)
)


		Parameters: 0.035914M


In [11]:
# Training
# --------
    
# Helpers
from utils import timeit
from results import TrainResults as Results


def train(epoch):
    
    net.train()
    print('\nEpoch: %d' % epoch)

    total = 0
    correct = 0
    global results
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        
        optimizer.zero_grad()

        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
    
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        ## TODO: UNCOMMENT WHEN RUNNING ON SERVER - It just for debuggin on local
        if testing and batch_idx == 5:
            break
    
    accuracy = 100.*correct/total    
    results.append_loss(round(loss.item(),2), 'train')
    results.append_accy(round(accuracy,2), 'train')    
    print('Train :: Loss: {} | Accy: {}'.format(round(loss.item(),2), round(accuracy,2)))

        
def test(epoch):
    
    net.eval()

    total = 0
    correct = 0
    global results
    global best_acc

    with torch.no_grad():
        
        for batch_idx, (inputs, targets) in enumerate(testloader):
            
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            # TODO: UNCOMMENT WHEN RUNNING ON SERVER -> wraped in test parameter
            if testing and batch_idx == 5:
                break
            
    # Save checkpoint.
    acc = 100.*correct/total
    results.append_loss(round(loss.item(),2), 'valid')
    results.append_accy(round(acc,2), 'valid')
    print('Valid :: Loss: {} | Accy: {}'.format(round(loss.item(),2), round(acc,2)))
    
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/rec_ckpt.t7')
        best_acc = acc


def lr_schedule(epoch):

    global milestones
    if epoch in milestones:
        for p in optimizer.param_groups:  p['lr'] = p['lr'] / 10
        print('\n** Changing LR to {} \n'.format(p['lr']))    
    return
    

In [12]:
path = '../results/single_recursive_model/Results_Single_Recursive.pkl'
def results_backup():
    global results
    with open(path, 'wb') as object_result:
        pickle.dump(results, object_result, pickle.HIGHEST_PROTOCOL)     

In [13]:
@timeit
def run_epoch(epoch):
    
    lr_schedule(epoch)
    train(epoch)
    test(epoch)
    results_backup()

In [14]:
# Send model to GPU(s)
    
results = Results([net])
net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True


In [16]:
# Start training
   
testing = False
print('[OK]: Starting Training of Single Model')
for epoch in range(start_epoch, num_epochs):
    run_epoch(epoch)
    
results.show()

[OK]: Starting Training of Single Model

Epoch: 0
Train :: Loss: 2.3 | Accy: 9.9
Valid :: Loss: 2.3 | Accy: 10.0
Saving..
'run_epoch'  9355.07 ms

Epoch: 1
Train :: Loss: 2.3 | Accy: 9.82
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9562.82 ms

Epoch: 2
Train :: Loss: 2.3 | Accy: 9.81
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9610.84 ms

Epoch: 3
Train :: Loss: 2.3 | Accy: 9.95
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9311.90 ms

Epoch: 4
Train :: Loss: 2.3 | Accy: 9.78
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9248.70 ms

Epoch: 5
Train :: Loss: 2.3 | Accy: 9.83
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9317.80 ms

Epoch: 6
Train :: Loss: 2.3 | Accy: 9.82
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9195.69 ms

Epoch: 7
Train :: Loss: 2.3 | Accy: 9.8
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9908.25 ms

Epoch: 8
Train :: Loss: 2.3 | Accy: 9.99
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  9271.81 ms

Epoch: 9
Train :: Loss: 2.3 | Accy: 9.88
Valid :: Loss: 2.3 | Accy: 10

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(range(len(results.train_loss)), results.train_loss, label='Train')
plt.plot(range(len(results.valid_loss)), results.valid_loss, label='Valid')
plt.title('Loss')
plt.show()

plt.figure()
plt.plot(range(len(results.train_accy)), results.train_accy, label='Train')
plt.plot(range(len(results.valid_accy)), results.valid_accy, label='Valid')

plt.title('Accuracy')
plt.show()