In [1]:
import torch
import numpy as np

%matplotlib notebook
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

# Pytorch:

En Pytorch yo puedo crear objetos tipo `torch.Tensor` y usarlos como numpy arrays. En este caso, los tensores que se crean para este tipo de propósito son de tipo *leaf* ("hoja") y se refieren a que son "hojas del grafo computacional del backpropagation". Si sólo hago operaciones de tipo crear y calcular operaciones con esos tensores (es decir, no calculo gradientes), todos los tensores van a ser de tipo *leaf*, y va a ser lo mismo que usar numpy. 

Ahora, cada objeto de tipo `torch.Tensor` tiene un flag `requires_grad` y un objeto de tipo `torch.autograd.Function` llamado `grad_fn` que se encarga de guardar la información necesaria para hacer backpropagation. También contiene un objeto `grad` (también de tipo `torch.tensor`) que contiene el valor del gradiente para ese tensor. Si yo quiero armar un grafo $f(x)$ tengo que definir un tensor *leaf* con `requires_grad=True` y crear otros tensores a partir de éste. Todos los tensores creados a partir de un tensor *leaf* con `requires_grad=True`, dejan de ser *leaf*.

Sólo es posible cambiar el flag `requires_grad` en los tensores *leaf*. Si quiero que un tensor no *leaf* pase a ser *leaf*, tengo que usar `.detach()`

In [65]:
class MyModel(nn.Module):
    
    def __init__(self):
        super(MyModel,self).__init__()
        self.emb = nn.Embedding(4,2)
        self.linear = nn.Linear(2,4,bias=False)
        
    def forward(self,x):
        return self.linear(self.emb(x).mean(dim=1))
    

model = MyModel()
x = torch.tensor([[0,2,2,3,1],[1,2,2,3,1],[2,2,2,3,1]])
print(x)
print(model.forward(x))
print(list(model.parameters()))
model = model.to(device=torch.device('cuda:1'))
print(x)
x = x.to(device=torch.device('cuda:1'))
print(x)
print(model.forward(x))
print(list(model.parameters()))

tensor([[0, 2, 2, 3, 1],
        [1, 2, 2, 3, 1],
        [2, 2, 2, 3, 1]])
tensor([[ 0.2518, -0.0550,  0.1407, -0.2840],
        [ 0.5023, -0.0691,  0.2553, -0.5804],
        [ 0.1473,  0.0573,  0.0261, -0.1966]], grad_fn=<MmBackward>)
[Parameter containing:
tensor([[ 0.4023, -0.2521],
        [-0.3493,  2.1044],
        [-0.0991, -1.1768],
        [-0.2976,  2.8471]], requires_grad=True), Parameter containing:
tensor([[ 0.0387,  0.5440],
        [-0.6702, -0.2437],
        [ 0.4380,  0.3827],
        [ 0.1817, -0.5709]], requires_grad=True)]
tensor([[0, 2, 2, 3, 1],
        [1, 2, 2, 3, 1],
        [2, 2, 2, 3, 1]])
tensor([[0, 2, 2, 3, 1],
        [1, 2, 2, 3, 1],
        [2, 2, 2, 3, 1]], device='cuda:1')
tensor([[ 0.2518, -0.0550,  0.1407, -0.2840],
        [ 0.5023, -0.0691,  0.2553, -0.5804],
        [ 0.1473,  0.0573,  0.0261, -0.1966]], device='cuda:1',
       grad_fn=<MmBackward>)
[Parameter containing:
tensor([[ 0.4023, -0.2521],
        [-0.3493,  2.1044],
        [-0.0991,

## Paso 1: Definir el dataset

In [3]:
from torchvision import datasets as dset
import torchvision.transforms as T

transform = T.Compose([T.ToTensor(),T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

cifar10_train = dset.CIFAR10('CIFAR10/train/', train=True, download=True, transform=transform)
cifar10_test = dset.CIFAR10('CIFAR10/test/', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


## Paso 2: pasarle el dataset al entrenador

In [118]:
import torch
from torch.utils.data import DataLoader, sampler
import torch.optim as optim
import torch.nn.functional as F

class ModelTrainer(object):
    
    def __init__(self,
                 model,
                 train_dataset,
                 test_dataset,
                 batch_size=64,
                 val_size=.02):
        
        # Model:
        self.model = model
        
        # Data:
        tr, val, te = self.generate_data_batches(train_dataset, test_dataset,batch_size,val_size)
        self.train_dataloader, self.val_dataloader, self.test_dataloader = tr, val, te
        
        # Data-types:
        self.input_dtype = next(iter(self.train_dataloader))[0].dtype
        self.target_dtype = next(iter(self.train_dataloader))[1].dtype
        
        self.first_time = True
        self.batch_len = len(self.train_dataloader)
        
        print('Model trainer created:')
        train_samples = int((1 - val_size) * len(train_dataset)) 
        val_samples = len(train_dataset) - train_samples
        test_samples = len(test_dataset)
        total_samples = train_samples + val_samples + test_samples
        percent_val, percent_test = int((val_samples / total_samples) * 100), int((test_samples / total_samples) * 100)
        print('Number of training samples: {} ({}%)'.format(train_samples, 100 - percent_val - percent_test))
        print('Number of validation samples: {} ({}%)'.format(val_samples, percent_val))
        print('Number of test samples: {} ({}%)'.format(test_samples, percent_test))
        print('Number of train batches: {}'.format(self.batch_len))
        print('Number of samples per batch: {}'.format(batch_size))
        print()
        
        
    def generate_data_batches(self,train_dataset, test_dataset, # Train y test datasets
                              batch_size = 64, # Tamaño del batch
                              val_size = .02): # Proporción de muestras utilizadas para validación 
    
        """
            Función para iterar sobre los batches de muestras. 
            Devuelve los dataloaders de train / validation / test.
            
        """

        # Separo las muestras aleatoriamente en Train y Validation:
        NUM_TRAIN = int((1 - val_size) * len(train_dataset)) 
        samples_idx = torch.randperm(len(train_dataset))
        train_samples_idx = samples_idx[:NUM_TRAIN]
        val_samples_idx = samples_idx[NUM_TRAIN:]
        my_sampler = lambda indices: sampler.SubsetRandomSampler(indices) # sampler

        # Dataloader para las muestras de entrenamiento:
        train_dataloader = DataLoader(train_dataset, 
                                      batch_size=batch_size, 
                                      sampler=my_sampler(train_samples_idx))

        # Dataloader para las muestras de validación:
        val_dataloader = DataLoader(train_dataset, 
                                    batch_size=batch_size, 
                                    sampler=my_sampler(val_samples_idx))

        # Dataloader para las muestras de testeo:
        test_dataloader = DataLoader(test_dataset, 
                                     batch_size=batch_size)

        return train_dataloader, val_dataloader, test_dataloader
    
    
    def InitParameters(self,from_pretrained=None,use_gpu=None, **kwargs):
       
        if from_pretrained is not None:
            pass

        # Defino el dispositivo sobre el cual trabajar:
        if use_gpu == 0:
            self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
        elif use_gpu == 1:
            self.device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')
        elif use_gpu is None:
            self.device = torch.device('cpu')
        
        self.model.init_parameters(kwargs)
        self.model = self.model.to(device=self.device)

        
    def SGDTrain(self, epochs=1, learning_rate=1e-1, sample_loss_every=100, check_on_train=False):
        
        if self.first_time:
            print('Starting training...')
            n_iter = 0
            self.performance_history = {'iter': [], 'loss': [], 'accuracy': []}
            self.first_time = False
        else:
            n_iter = self.performance_history['iter'][-1]
            print('Resuming training...')
        
        optimizer = optim.SGD(self.model.parameters(), lr=learning_rate)
        
        print('Optimization method: Stochastic Gradient Descent')
        print('Learning Rate: {:.2g}'.format(learning_rate))
        print('Number of epochs: {}'.format(epochs))
        print('Running on device "{}"'.format(self.device))
        print()
        
        try:
    
            for e in range(epochs):
                for t, (x,y) in enumerate(self.train_dataloader):

                    x = x.to(device=self.device, dtype=self.input_dtype)
                    y = y.to(device=self.device, dtype=self.target_dtype)

                    optimizer.zero_grad() # Llevo a cero los gradientes de la red
                    scores = self.model(x) # Calculo la salida de la red
                    loss = self.model.loss(scores,y) # Calculo el valor de la loss
                    loss.backward() # Calculo los gradientes
                    optimizer.step() # Actualizo los parámetros

                    if (e * self.batch_len + t) % sample_loss_every == 0:
                        num_correct_val, num_samples_val = check_accuracy('validation')
                        performance_history['iter'].append(e * self.batch_len + t + n_iter)
                        performance_history['loss'].append(loss.item())
                        performance_history['accuracy'].append(float(num_correct_val / num_samples_val))
                        print('Epoch: {}, Batch number: {}'.format(e+1, t))
                        print('Accuracy on validation dataset: {}/{} ({:.2f}%)'.format(num_correct_val, num_samples_val, 100 * float(num_correct_val) / num_samples_val))
                        print()

                        if check_on_train:
                            num_correct_train, num_samples_train = check_accuracy('train')
                            print('Accuracy on train dataset: {}/{} ({:.2f}%)'.format(num_correct_train, num_samples_train, 100 * float(num_correct_train) / num_samples_train))
                            print()

            print('Training finished')
            print()

        except KeyboardInterrupt:

            print('Exiting training...')
            print()    

    def check_accuracy(self, dataset='validation'):
        
        num_correct = 0
        num_samples = 0
        
        if dataset == 'train':
            loader = self.train_dataloader
        elif dataset == 'validation':
            loader = self.val_dataloader
        elif dataset == 'test':
            loader = self.test_dataloader
        else:
            raise AttributeError('Please specify on which dataset to perform de accuracy calculation')
        
        self.model.eval()
        with torch.no_grad():
            for x, y in loader:
                x = x.to(device=self.device, dtype=self.input_dtype)  
                y = y.to(device=self.device, dtype=self.target_dtype)

                scores = self.model(x)
                _, preds = scores.max(1)
                num_correct += (preds == y).sum()
                num_samples += preds.size(0)

        self.model.train()
        return num_correct, num_samples

    def CheckResultsOnTest(self):
        
        total_corrects = 0
        total_samples = 0
        total_performance = 0.
        
        for (x,y) in enumerate(self.test_dataloader):
            x = x.to(device=self.device, dtype=self.input_dtype)
            y = y.to(device=self.device, dtype=self.target_dtype)
            num_correct, num_samples = check_accuracy('test')
            total_corrects += num_corrects
            total_samples += num_samples
            total_performance += float(num_correct / num_samples)
        
        print('Final accuracy on test set: {}/{} ({}%)'.format(total_corrects,total_samples,total_performance))

In [114]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
    def init_parameters(self,a):
        pass
    
    def loss(self,output,target):
        criterion = nn.MSELoss()
        loss = criterion(output, target)
        return loss


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [119]:
trainer = ModelTrainer(net, cifar10_train, cifar10_test, batch_size=64, val_size=.02)

Model trainer created:
Number of training samples: 49000 (83%)
Number of validation samples: 1000 (1%)
Number of test samples: 10000 (16%)
Number of train batches: 766
Number of samples per batch: 64



In [120]:
trainer.InitParameters(from_pretrained=None, use_gpu=1, a=1)
for param in trainer.model.parameters():
    print(param)

Parameter containing:
tensor([[[[-0.1310, -0.1424, -0.1720],
          [ 0.1197,  0.1184, -0.2506],
          [ 0.2374, -0.0751, -0.0696]]],


        [[[ 0.0798,  0.0304,  0.1062],
          [-0.1129, -0.2112,  0.0842],
          [-0.1649,  0.0471,  0.2805]]],


        [[[ 0.1573, -0.2870,  0.0680],
          [ 0.0907, -0.1963,  0.3085],
          [-0.0539, -0.1999, -0.0290]]],


        [[[-0.1169, -0.2866, -0.1622],
          [ 0.0678, -0.1545, -0.2050],
          [ 0.2296,  0.0229,  0.3149]]],


        [[[-0.1902, -0.2176, -0.2299],
          [ 0.3053,  0.1966, -0.1620],
          [ 0.0115,  0.1770, -0.1851]]],


        [[[ 0.2123,  0.1677, -0.0322],
          [ 0.0689,  0.0589, -0.1155],
          [ 0.1154,  0.1046, -0.2012]]]], device='cuda:1', requires_grad=True)
Parameter containing:
tensor([-0.3007,  0.2125, -0.2648,  0.2078, -0.0942, -0.1546], device='cuda:1',
       requires_grad=True)
Parameter containing:
tensor([[[[ 0.0338,  0.0019, -0.1326],
          [ 0.0267,  0.131

In [121]:
trainer.SGDTrain()

Parameter containing:
tensor([[[[-0.1310, -0.1424, -0.1720],
          [ 0.1197,  0.1184, -0.2506],
          [ 0.2374, -0.0751, -0.0696]]],


        [[[ 0.0798,  0.0304,  0.1062],
          [-0.1129, -0.2112,  0.0842],
          [-0.1649,  0.0471,  0.2805]]],


        [[[ 0.1573, -0.2870,  0.0680],
          [ 0.0907, -0.1963,  0.3085],
          [-0.0539, -0.1999, -0.0290]]],


        [[[-0.1169, -0.2866, -0.1622],
          [ 0.0678, -0.1545, -0.2050],
          [ 0.2296,  0.0229,  0.3149]]],


        [[[-0.1902, -0.2176, -0.2299],
          [ 0.3053,  0.1966, -0.1620],
          [ 0.0115,  0.1770, -0.1851]]],


        [[[ 0.2123,  0.1677, -0.0322],
          [ 0.0689,  0.0589, -0.1155],
          [ 0.1154,  0.1046, -0.2012]]]], device='cuda:1', requires_grad=True)
tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0