In [1]:
!git pull

Already up-to-date.


In [2]:
import os
import sys
import pickle
import argparse

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

from utils import avoidWarnings
from beautifultable import BeautifulTable as BT

avoidWarnings()

''' OPTIMIZER PARAMETERS - Analysis on those '''

test = False
best_acc = 0  
start_epoch = 0  
num_epochs = 200  ## TODO: set to args.epochs
batch_size = 128  ## TODO: set to args.barch
milestones = [100, 150]

L = 16
M = 32
E = 7
 
comments = True
n_workers = torch.multiprocessing.cpu_count()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
gpus = True if torch.cuda.device_count() > 1 else False
device_name = torch.cuda.get_device_name(0) if device == 'cuda' else 'CPUs'
    
table = BT()
table.append_row(['Python Version', sys.version[:5]])
table.append_row(['PyTorch Version', torch.__version__])
table.append_row(['Device', str(device_name)])
table.append_row(['Cores', str(n_workers)])
table.append_row(['GPUs', str(torch.cuda.device_count())])
table.append_row(['CUDNN Enabled', str(torch.backends.cudnn.enabled)])
table.append_row(['Architecture', 'DenseNet x7'])
table.append_row(['Dataset', 'CIFAR10'])
table.append_row(['Epochs', str(num_epochs)])
table.append_row(['Batch Size', str(batch_size)])

print(table)

+-----------------+----------------------+
| Python Version  |        3.6.5         |
+-----------------+----------------------+
| PyTorch Version |        1.0.0         |
+-----------------+----------------------+
|     Device      | Tesla V100-SXM2-16GB |
+-----------------+----------------------+
|      Cores      |          8           |
+-----------------+----------------------+
|      GPUs       |          1           |
+-----------------+----------------------+
|  CUDNN Enabled  |         True         |
+-----------------+----------------------+
|  Architecture   |     DenseNet x7      |
+-----------------+----------------------+
|     Dataset     |       CIFAR10        |
+-----------------+----------------------+
|     Epochs      |         200          |
+-----------------+----------------------+
|   Batch Size    |         128          |
+-----------------+----------------------+


In [3]:
# Data
# ----

avoidWarnings()
dataset = 'MNIST'
dataset = 'CIFAR'
from data import dataloaders
trainloader, testloader, classes = dataloaders(dataset, batch_size)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Models 
# ------
    
avoidWarnings()
comments = True
from models import Conv_Recusive_Net
from utils import count_parameters
net = Conv_Recusive_Net('recursive_net', layers=L, filters=M)


print('Recursive ConvNet')
if comments: print(net)
print('\n\n\t\tParameters: {}M'.format(count_parameters(net)/1e6))

Recursive ConvNet
Conv_Recusive_Net(
  (act): ReLU(inplace)
  (V): Conv2d(3, 32, kernel_size=(8, 8), stride=(1, 1), padding=(3, 3))
  (P): MaxPool2d(kernel_size=4, stride=4, padding=2, dilation=1, ceil_mode=False)
  (W): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc): Linear(in_features=2048, out_features=10, bias=True)
)


		Parameters: 0.035914M


In [5]:
from collections import OrderedDict

ensemble = OrderedDict()
for n in range(1,1+E):
    ensemble['net_{}'.format(n)] = Conv_Recusive_Net('net_{}'.format(n), layers=L, filters=M)

optimizers = []
criterion = nn.CrossEntropyLoss()

for n in range(1,1+E):
    optimizers.append(
        optim.SGD(ensemble['net_{}'.format(n)].parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)
    )

In [6]:
# Training
# --------

from results import TrainResults as Results


def train(epoch):
    
    global device
    global results
    global optimizers    
    print('\nEpoch: %d' % epoch)

    total = 0
    correct = 0
    len_ = len(trainloader)
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        
        individual_outputs = list()
        
        for n, net in enumerate(ensemble.values()):
            
            if device == 'cuda':
                net.to(device)
                net = torch.nn.DataParallel(net)
        
            net.train()
            optimizers[n].zero_grad()
            
            ## Individuals forward pass
            
            n_total = 0
            n_correct = 0
    
            inputs, targets = inputs.to(device), targets.to(device)
            output = net(inputs)
            loss = criterion(output, targets)
            
            loss.backward()
            optimizers[n].step()
        
            # Individual network performance            
            _, predicted = output.max(1)
            n_total += targets.size(0)
            n_correct += predicted.eq(targets).sum().item()
            n_accuracy = 100. * n_correct / n_total
            
            # Store iteration results for this individual
            results.append_iter_loss(round(loss.item(), 3), 'train', n+1)
            results.append_iter_accy(round(n_accuracy, 2), 'train', n+1)
            
            if batch_idx == len_-1:
                # Store epoch results for this individual (as last iter)
                results.append_loss(round(loss.item(), 3), 'train', n+1)
                results.append_accy(round(n_accuracy, 2), 'train', n+1)
            
            individual_outputs.append(output)
        
        ## TODO: Just set testing = True when debuggin on local
        if testing and batch_idx == 5:
            break
    
     ## Ensemble forward pass
        
    output = torch.mean(torch.stack(individual_outputs), dim=0)
    loss = criterion(output, targets) 
    
    _, predicted = output.max(1)
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()
    accuracy = 100 * correct / total
    
    # Store iteration results for Ensemble
    results.append_iter_loss(round(loss.item(), 3), 'train', None)
    results.append_iter_accy(round(accuracy, 2), 'train', None)

    print('Train :: Loss: {} | Accy: {}'.format(round(loss.item(),2), round(accuracy,2)))

        
def test(epoch):

    total = 0
    correct = 0
    global results
    global best_acc

    with torch.no_grad():
        
        for batch_idx, (inputs, targets) in enumerate(testloader):
            
            inputs, targets = inputs.to(device), targets.to(device)
            
            outs = []
            for n, net in enumerate(ensemble.values()):
                
                net.eval()  
                net.to(device)

                ## Individual forward pass
                
                n_total = 0
                n_correct = 0
                
                output = net(inputs)
                outs.append(output)
                loss = criterion(output, targets)
    
                # Store epoch (as first iteration of the epoch) results for each net
                if batch_idx == 0:
    
                    _, predicted = output.max(1)
                    n_total += targets.size(0)
                    n_correct += predicted.eq(targets).sum().item()
                    n_accuracy = n_correct / n_total
                    
                    results.append_loss(round(loss.item(), 3), 'valid', n+1)
                    results.append_accy(round(n_accuracy * 100, 2), 'valid', n+1)
    
            
            # TODO: UNCOMMENT WHEN RUNNING ON SERVER -> wraped in test parameter
            if testing and batch_idx == 5:
                break
            
            _, predicted = output.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        # Store epoch results for ensemble
        acc = 100.*correct/total
        results.append_loss(round(loss.item(), 3), 'valid', None)
        results.append_accy(round(acc,2), 'valid', None)
        print('Valid :: Loss: {} | Accy: {}'.format(round(loss.item(),2), round(acc,2)))
    
            
    # Save checkpoint.
    
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint/'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ens_rec_ckpt.t7')
        best_acc = acc
    return


def lr_schedule(epoch):

    global E
    global milestones
    if epoch in milestones:
        for n in range(E):
            for p in optimizers[n].param_groups:  p['lr'] = p['lr'] / 10
            print('\n** Changing LR to {} \n'.format(p['lr']))    
    return

In [7]:
path = '../results/ensemble_recursive_model/Results_Ensemble_Recursive.pkl'
def results_backup():
    global results
    with open(path, 'wb') as object_result:
        pickle.dump(results, object_result, pickle.HIGHEST_PROTOCOL)   
    return

In [8]:
from utils import timeit

@timeit
def run_epoch(epoch):
    
    lr_schedule(epoch)
    train(epoch)
    test(epoch)
    results_backup()

In [9]:
results = Results([net])
results.append_time(0)
names = [n.name for n in ensemble.values()]
results.name = names[0][:-2] + '(x' + str(len(names)) + ')'

In [None]:
# Start training
    
testing = False
print('[OK]: Starting Training of Single Model')
for epoch in range(start_epoch, num_epochs):
    run_epoch(epoch)

    
results.show()

[OK]: Starting Training of Single Model

Epoch: 0
Train :: Loss: 1.92 | Accy: 36.25
Valid :: Loss: 2.3 | Accy: 10.0
Saving..
'run_epoch'  25739.09 ms

Epoch: 1
Train :: Loss: 1.73 | Accy: 33.75
Valid :: Loss: 2.31 | Accy: 10.0
'run_epoch'  25968.26 ms

Epoch: 2
Train :: Loss: 1.66 | Accy: 48.75
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31233.53 ms

Epoch: 3
Train :: Loss: 1.61 | Accy: 48.75
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31632.80 ms

Epoch: 4
Train :: Loss: 1.59 | Accy: 52.5
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31592.81 ms

Epoch: 5
Train :: Loss: 1.64 | Accy: 47.5
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31804.83 ms

Epoch: 6
Train :: Loss: 1.67 | Accy: 37.5
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31362.64 ms

Epoch: 7
Train :: Loss: 1.52 | Accy: 47.5
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31498.71 ms

Epoch: 8
Train :: Loss: 1.35 | Accy: 67.5
Valid :: Loss: 2.3 | Accy: 10.0
'run_epoch'  31697.77 ms

Epoch: 9
Train :: Loss: 1.39 | Accy: 53.75
Va

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(range(len(results.train_loss)), results.train_loss, label='Train')
plt.plot(range(len(results.valid_loss)), results.valid_loss, label='Valid')
plt.title('Loss')
plt.show()

plt.figure()
plt.plot(range(len(results.train_accy)), results.train_accy, label='Train')
plt.plot(range(len(results.valid_accy)), results.valid_accy, label='Valid')

plt.title('Accuracy')
plt.show()