# Project

### Initialization

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import time

## 1. Classification, weight sharing, auxiliary losses

In [3]:
from dlc_practical_prologue import generate_pair_sets
N = 1000
epochs = 25

In [9]:
a = generate_pair_sets(N)

In [28]:
a[2]

tensor([[7, 9],
        [1, 8],
        [5, 3],
        ...,
        [2, 7],
        [2, 0],
        [8, 1]])

## 2. Mini deep-learning framework

In [4]:
import math
from torch import empty
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x24210e722c8>

In [5]:
def generate_disk_set(N):
    
    # Generate train sets of 2 uniform distributions on [0,1]x[0,1]
    train_input = torch.empty(N, 2).uniform_(0, 1)
    test_input = torch.empty(N, 2).uniform_(0, 1)
    
    recenter = torch.tensor([0.5, 0.5]) # to act as if the train data was centered around 0, to ease the following computation
    
    # Generate the target tensors filled with 1 if datapoint is inside of specific circle
    train_target = (-(train_input - recenter).pow(2).sum(1).sqrt().sub(1 / math.sqrt(2 * math.pi))).sign().add(1).div(2).int()
    test_target = (-(train_input - recenter).pow(2).sum(1).sqrt().sub(1 / math.sqrt(2 * math.pi))).sign().add(1).div(2).int()
    
    return train_input, test_input, train_target, test_target

In [24]:
train_input, test_input, train_target, test_target = generate_disk_set(N)
train_input, test_input, train_target, test_target

(tensor([[0.3001, 0.3820],
         [0.4924, 0.3364],
         [0.7606, 0.4009],
         ...,
         [0.1031, 0.2530],
         [0.1492, 0.7535],
         [0.9453, 0.8437]]),
 tensor([[0.2621, 0.4073],
         [0.8742, 0.5634],
         [0.0227, 0.9914],
         ...,
         [0.9823, 0.0819],
         [0.4692, 0.0556],
         [0.8813, 0.7483]]),
 tensor([1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
         1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
         1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,
         0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1,
         1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
         0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
         1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1,
         1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,

In [20]:
class Module(object):
    def __init__(self):
        pass

    '''
    Compute forward pass from an input tensor and return a tensor
    or a tuple of tensors as output
    '''
    def forward(self, x):
        raise NotImplementedError
        
    def backward(self, grad):
        '''
        should get as input a tensor or a tuple of tensors containing the 
        gradient of the loss with respect to the module’s output, accumulate 
        the gradient wrt the parameters, and return a tensor or a tuple of
        tensors containing the gradient of the loss wrt the module’s input.
        '''
        raise NotImplementedError
        
    def params(self):
        '''
        param should return a list of pairs, each composed of a parameter tensor, and a gradient tensor
        of same size. This list should be empty for parameterless modules (e.g. ReLU).

        '''
        return []
        
    def reset_params(self):
        return

### 1. Linear module

In [14]:
import torch
from torch.nn.init import xavier_normal_, xavier_normal

class Linear(Module):
    def __init__(self, dim_in, dim_out):
        super(Linear, self).__init__()
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.epsilon = 1e-3
        self.x = 0

        # Initialize weights
        self.w = xavier_normal_(torch.empty(self.dim_out, self.dim_in))
        self.b = torch.empty(self.dim_out).normal_(0, self.epsilon)

        # Initialize gradient
        self.dl_dw = torch.empty(self.w.size())
        self.dl_db = torch.empty(self.b.size())
    
    def forward(self, x):
        self.x = x
        return self.x.mm(self.w.t()) + self.b


    def backward(self, grad):
        ds_dx = self.w.t()

        # do the same for every batch (batch dim becomes 1)
        dl_dx = ds_dx @ grad.t()

        # put batch dim back to 0
        dl_dx = dl_dx.t()

        # sum over all the outer product between (grad_1 * x_1^T) (_1 denotes not using mini-batches)
        self.dl_dw.add_(grad.t() @ self.x)

        # sum over the batch
        self.dl_db.add_(grad.sum(0))

        return dl_dx
        
    def params(self):
        return [(self.w, self.b), (self.dl_dw, self.dl_db)]
    
    def update_params(self, eta):
        self.w = self.w - eta * self.dl_dw
        self.b = self.b - eta * self.dl_db
        
    def reset_gradient(self):
        self.dl_dw.zero_()
        self.dl_db.zero_()

    def reset_params(self):
        # Initialize weights
        xavier_normal_(self.w)
        self.b.normal_(0, self.epsilon)

### 2. ReLu

In [16]:
import torch

def dReLU(x):
    s = x.clone()
    s[x>0] = 1
    s[x<=0] = 0
    return s


class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.clamp(min=0)
        
    def backward(self, grad):
        #ds_dx = dReLU(self.x)
        ds_dx = (torch.sign(self.x) + 1)/2
        dl_dx = ds_dx*grad
        return dl_dx
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

### 3. Tanh

In [17]:
class Tanh(Module):
    def __init__(self):
        super(Tanh, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.tanh()
        
    def backward(self, grad):
        ds_dx = 4 * (self.x.exp() + self.x.mul(-1).exp()).pow(-2)
        dl_dx = ds_dx*grad
        return dl_dx
        
    def params(self):
        return []
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

### 4. Sequential

In [18]:
class Sequential(Module):
    def __init__(self, *modules):
        super(Sequential, self).__init__()
        self.module_lst = []
        for module in modules:
            self.module_lst.append(module)
    
    def forward(self, x):
        for module in self.module_lst:
            x = module.forward(x)
        return x
        
    def backward(self, grad):
        for module in reversed(self.module_lst):
            grad = module.backward(grad)
        return grad
    
    def update_params(self, eta):
        for module in self.module_lst:
            module.update_params(eta)
            
    def params(self):
        lst = []
        for module in self.module_lst:
            lst.append(module.params())
        return lst
    
    def reset_gradient(self):
        for module in self.module_lst:
            module.reset_gradient()
        return
    
    def reset_params(self):
        for module in self.module_lst:
            module.reset_params()

### 5. Loss (MSE)

In [19]:
class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()
        
    def forward(self, v, t):
        return (v - t).pow(2).sum()
    
    def backward(self, v, t):
        return 2 * (v - t)

### 6. Internet SGD

In [46]:
class Optimizer(object):
    def __init__(self,parameters):
        self.parameters = parameters
    
    def step(self): 
        raise NotImplementedError

    def zeroGrad(self):
        for p in self.parameters:
            p.grad = 0.

class SGD(Optimizer):
    def __init__(self,parameters,lr=.001,weight_decay=0.0,momentum = .9):
        super().__init__(parameters)
        self.lr           = lr
        self.weight_decay = weight_decay
        self.momentum     = momentum
        self.velocity     = []
        for p in parameters:
            self.velocity.append(np.zeros_like(p.grad))

    def step(self):
        for p,v in zip(self.parameters,self.velocity):
            v = self.momentum*v+p.grad+self.weight_decay*p.data
            p.data=p.data-self.lr*v

---

### Model Building

In [37]:
class Net(Module):
    def __init__(self, nb_hidden):
        super().__init__()
        self.fc1 = Linear(256, nb_hidden)
        self.fc2 = Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [35]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [22]:
def train_model(model, train_input, train_target, mini_batch_size = 10, nb_epochs = 100):
    criterion = MSELoss()
    eta = 1e-1

    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            acc_loss = acc_loss + loss.item()

            model.zero_grad()
            loss.backward()

            with torch.no_grad():
                for p in model.parameters():
                    p -= eta * p.grad

        print(e, acc_loss)

In [38]:
model = Net(200)
mini_batch_size = 100
train_model(model, train_input, train_target, mini_batch_size)

TypeError: 'Net' object is not callable

In [47]:
batch_size        = 20
num_epochs        = 200
samples_per_class = 100
num_classes       = 3
hidden_units      = 100

model             = Net(100)
optim   = SGD(model.parameters,lr=1.0,weight_decay=0.001,momentum=.9)
loss_fn = MSELoss()
model.fit(train_input,train_target,batch_size,num_epochs,optim,loss_fn)
predicted_labels = np.argmax(model.predict(data),axis=1)
accuracy         = np.sum(predicted_labels==target)/len(target)
print("Model Accuracy = {}".format(accuracy))
utilities.plot2DDataWithDecisionBoundary(data,target,model)

AttributeError: 'Net' object has no attribute 'parameters'

---

# OneNote and so on

In [94]:
# MLP with 10 dimension input, 2 dimension output, ReLu activation and 2 hidden layers of dimension 100 and 50 :

nb_hidden = 25

model = nn.Sequential(
    nn.Linear(train_input.size(0),nb_hidden), nn.ReLU(),
    nn.Linear(nb_hidden, nb_hidden), nn.ReLU(),
    nn.Linear(nb_hidden, nb_hidden), nn.ReLU(),
    nn.Linear(nb_hidden,2)
)
output = model(train_input[:,0])
print(output, output.size())

tensor([0.0126, 0.0244]) torch.Size([2])


In [None]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

For any model of reasonable complexity, the best is to write a sub-class of torch.nn.Module

In [91]:
N = 100
nb_hidden = 25
train_input, test_input, train_target, test_target = generate_disk_set(N)

In [66]:
train_input.view(-1)

tensor([0.4474, 0.1293, 0.3799, 0.9915, 0.3407, 0.2672, 0.2204, 0.4320, 0.9817,
        0.9715, 0.3266, 0.3897, 0.2064, 0.9942, 0.0016, 0.3661, 0.9314, 0.5677,
        0.8934, 0.4375, 0.5778, 0.0275, 0.4904, 0.4365, 0.6211, 0.7106, 0.6118,
        0.0729, 0.5501, 0.3222, 0.4279, 0.0826, 0.0834, 0.8889, 0.5114, 0.9144,
        0.9046, 0.6023, 0.4327, 0.1574, 0.4418, 0.1041, 0.4096, 0.1024, 0.5083,
        0.1955, 0.5527, 0.3064, 0.3274, 0.4981, 0.0486, 0.9130, 0.7728, 0.5100,
        0.1335, 0.0131, 0.3708, 0.2610, 0.2100, 0.5865, 0.8330, 0.2257, 0.9406,
        0.6698, 0.2257, 0.7610, 0.0579, 0.6792, 0.6793, 0.7769, 0.4474, 0.8501,
        0.0397, 0.1228, 0.8479, 0.7594, 0.1201, 0.3619, 0.6985, 0.3740, 0.1334,
        0.0299, 0.8139, 0.6408, 0.1183, 0.7721, 0.8264, 0.5864, 0.7392, 0.3238,
        0.4670, 0.0569, 0.2674, 0.9871, 0.1755, 0.1720, 0.0281, 0.8237, 0.9125,
        0.5453, 0.1243, 0.5424, 0.4292, 0.2261, 0.3241, 0.3811, 0.3366, 0.7102,
        0.3545, 0.1935, 0.1812, 0.3359, 