# Ejercicios

## Nathalia Morales

1. Experimenten con el numero de neuronas en el modelo al igual que el learning rate.
    
    a. Que cambios resultan en un output mas lineal del modelo?
    
    b. Pueden hacer que el modelo haga un overfit obvio de la data?
    
    
    
    
2. Cargen la [data de vinos blancos](https://archive.ics.uci.edu/ml/datasets/wine+quality) y creen un modelo con el numero apropiado de inputs
    
    a. Cuanto tarda en entrenar comparado al dataset que hemos estado usando?
    
    b. Pueden explicar que factores contribuyen a los tiempos de entrenamiento?
    
    c. Pueden hacer que el _loss_ disminuya?
    
    d. Intenten graficar la data

## PREGUNTA 1


In [1]:
import torch
from torch import nn
import torch.optim as optim

In [2]:
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0] # Temperatura en grados celsios
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # Unidades desconocidas
t_c = torch.tensor(t_c).unsqueeze(1) # Agregamos una dimension para tener B x N_inputs
t_u = torch.tensor(t_u).unsqueeze(1) # Agregamos una dimension para tener B x N_inputs

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [3]:
def training_loop(model, n_epochs, optimizer, loss_fn, train_x, val_x, train_y, val_y):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_x)
        train_loss = loss_fn(train_t_p, train_y)
        
        with torch.no_grad():
            val_t_p = model(val_x)
            val_loss = loss_fn(val_t_p, val_y)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss}, Validation loss {val_loss}")

In [4]:
class SubclassModel(nn.Module):
    def __init__(self, num):
        super().__init__()
        self.hidden_linear = nn.Linear(1, num)
        self.hidden_activation = nn.Tanh()
        self.output_linear = nn.Linear(num, 1)

    def forward(self, input):
        hidden_t = self.hidden_linear(input)
        activated_t = self.hidden_activation(hidden_t)
        output_t = self.output_linear(activated_t)

        return output_t

## 13 layers

In [5]:
subclass_model = SubclassModel(13)

optimizer = optim.SGD(subclass_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=subclass_model,
    loss_fn=nn.MSELoss(), # Ya no estamos usando nuestra loss function hecha a mano
    train_x = train_t_un,
    val_x = val_t_un,
    train_y = train_t_c,
    val_y = val_t_c)

print('output', subclass_model(val_t_un))
print('answer', val_t_c)
print('hidden', subclass_model.hidden_linear.weight.grad)

Epoch 1, Training loss 207.57418823242188, Validation loss 56.70370864868164
Epoch 1000, Training loss 4.345487594604492, Validation loss 3.6638436317443848
Epoch 2000, Training loss 4.191321849822998, Validation loss 8.117053031921387
Epoch 3000, Training loss 2.768686532974243, Validation loss 5.3557562828063965
Epoch 4000, Training loss 2.3527543544769287, Validation loss 4.536910533905029
Epoch 5000, Training loss 2.0870046615600586, Validation loss 3.90848970413208
output tensor([[ 1.9058],
        [12.0217]], grad_fn=<AddmmBackward>)
answer tensor([[ 0.5000],
        [11.0000]])
hidden tensor([[  0.8088],
        [ 12.7127],
        [  0.0811],
        [-11.4233],
        [ -0.1161],
        [-12.2943],
        [ -0.0471],
        [-13.5199],
        [ -8.8895],
        [  8.7191],
        [ -2.4318],
        [  0.1213],
        [-11.8224]])


## 5 layers

In [6]:
subclass_model = SubclassModel(5)

optimizer = optim.SGD(subclass_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=subclass_model,
    loss_fn=nn.MSELoss(), # Ya no estamos usando nuestra loss function hecha a mano
    train_x = train_t_un,
    val_x = val_t_un,
    train_y = train_t_c,
    val_y = val_t_c)

print('output', subclass_model(val_t_un))
print('answer', val_t_c)
print('hidden', subclass_model.hidden_linear.weight.grad)

Epoch 1, Training loss 199.02017211914062, Validation loss 52.30779266357422
Epoch 1000, Training loss 7.619917392730713, Validation loss 3.8969128131866455
Epoch 2000, Training loss 6.849825382232666, Validation loss 11.925092697143555
Epoch 3000, Training loss 3.305816411972046, Validation loss 6.030238628387451
Epoch 4000, Training loss 2.22808575630188, Validation loss 4.1533331871032715
Epoch 5000, Training loss 1.9908980131149292, Validation loss 4.028945446014404
output tensor([[ 2.1724],
        [11.8699]], grad_fn=<AddmmBackward>)
answer tensor([[ 0.5000],
        [11.0000]])
hidden tensor([[ -0.3298],
        [ 19.4053],
        [ 14.1897],
        [ -0.5535],
        [-16.9487]])


## 25 layers
* Tiende a overfit (1b)

In [7]:
subclass_model = SubclassModel(25)

optimizer = optim.SGD(subclass_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=subclass_model,
    loss_fn=nn.MSELoss(), # Ya no estamos usando nuestra loss function hecha a mano
    train_x = train_t_un,
    val_x = val_t_un,
    train_y = train_t_c,
    val_y = val_t_c)

print('output', subclass_model(val_t_un))
print('answer', val_t_c)
print('hidden', subclass_model.hidden_linear.weight.grad)

Epoch 1, Training loss 234.3259735107422, Validation loss 70.40892028808594
Epoch 1000, Training loss 3.1621735095977783, Validation loss 3.604989767074585
Epoch 2000, Training loss 3.053976535797119, Validation loss 0.8564319014549255
Epoch 3000, Training loss 2.3069705963134766, Validation loss 1.2977840900421143
Epoch 4000, Training loss 2.063998222351074, Validation loss 1.5867054462432861
Epoch 5000, Training loss 1.9696531295776367, Validation loss 1.7700603008270264
output tensor([[ 2.2175],
        [12.9766]], grad_fn=<AddmmBackward>)
answer tensor([[ 0.5000],
        [11.0000]])
hidden tensor([[ 2.9764e-03],
        [ 5.0816e-01],
        [ 2.1858e-02],
        [ 8.3383e-01],
        [ 5.9357e+00],
        [-7.8046e+00],
        [-4.4292e-01],
        [-8.1456e+00],
        [ 2.3645e-03],
        [ 8.6857e-04],
        [-6.7006e+00],
        [ 1.9460e-02],
        [-3.2586e-01],
        [ 4.4285e+00],
        [ 8.4881e+00],
        [-1.3229e+00],
        [-7.1200e-03],
       

* Mientras mas incrementamos el numero de layers menor se convierte el error pero tambien tiende a un overfit de la data. Para poder tener un modelo "mas linear" tendriamos que inclinarnos a encontrar un numero de layers que adecue para no hace uner o over-fit.

## PREGUNTA 2


Cargando la data.

a. Cuanto tarda en entrenar comparado al dataset que hemos estado usando?
    * No tarda mucho, dado que se carga desde numpy.
    
b. Pueden explicar que factores contribuyen a los tiempos de entrenamiento?
    * El tamano de la data. El tipo de estructura de data.
    
c. Pueden hacer que el loss disminuya?
    * 
    
d. Intenten graficar la data

In [8]:
import numpy as np

data = np.loadtxt('../data/tabular-wine/winequality-white.csv', delimiter=";", skiprows=1).astype(float)
#print(type(data))
print (data[0:2,:]) # first 2 rows

[[7.000e+00 2.700e-01 3.600e-01 2.070e+01 4.500e-02 4.500e+01 1.700e+02
  1.001e+00 3.000e+00 4.500e-01 8.800e+00 6.000e+00]
 [6.300e+00 3.000e-01 3.400e-01 1.600e+00 4.900e-02 1.400e+01 1.320e+02
  9.940e-01 3.300e+00 4.900e-01 9.500e+00 6.000e+00]]


In [9]:
x = data[:,0:11]
y = data[:,-1]
x = torch.tensor(x)
y = torch.tensor(y)

x = x.float()
y = y.float()

x = torch.tensor(x).unsqueeze(1) 
y = torch.tensor(y).unsqueeze(1)

n_samples = data.shape[0]
n_val = int(0.2 * n_samples)

train_x = x[:n_val]
train_y = y[:n_val]

val_x = x[n_val:]
val_y = y[n_val:]

CrossEntropyLoss = torch.nn.CrossEntropyLoss()
#train_t_un = 0.1 * train_t_u
#val_t_un = 0.1 * val_t_u

  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.


In [10]:
def training_loop(model, n_epochs, optimizer, loss_fn, train_x, val_x, train_y, val_y):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_x)
        train_loss = loss_fn(train_t_p, train_y)
        
        with torch.no_grad():
            val_t_p = model(val_x)
            val_loss = loss_fn(val_t_p, val_y)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss}, Validation loss {val_loss}")

In [18]:
class SubclassModel(nn.Module):
    def __init__(self, num):
        super().__init__()
        self.hidden_linear = nn.Linear(1, num)
        self.hidden_activation = nn.Tanh()
        self.output_linear = nn.Linear(num, 1)

    def forward(self, input):
        hidden_t = self.hidden_linear(input)
        activated_t = self.hidden_activation(hidden_t)
        output_t = self.output_linear(activated_t)
        torch.softmax(x,dim=1)

        return output_t
lf = nn.CrossEntropyLoss()

## Corriendo las funciones

### Con 25 Hidden Layers

In [19]:
subclass_model = SubclassModel(25)

optimizer = optim.SGD(subclass_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=subclass_model,
    loss_fn=nn.MSELoss(), # Ya no estamos usando nuestra loss function hecha a mano
    train_x = train_t_un,
    val_x = val_t_un,
    train_y = train_t_c,
    val_y = val_t_c)

print('output', subclass_model(val_t_un))
print('answer', val_t_c)
print('hidden', subclass_model.hidden_linear.weight.grad)

Epoch 1, Training loss 225.99929809570312, Validation loss 65.46632385253906
Epoch 1000, Training loss 3.2555410861968994, Validation loss 3.627790689468384
Epoch 2000, Training loss 3.222292423248291, Validation loss 0.7058439254760742
Epoch 3000, Training loss 2.393249273300171, Validation loss 1.1475290060043335
Epoch 4000, Training loss 2.1433265209198, Validation loss 1.405439853668213
Epoch 5000, Training loss 2.0343775749206543, Validation loss 1.5787097215652466
output tensor([[ 2.1963],
        [13.0570]], grad_fn=<AddmmBackward>)
answer tensor([[ 0.5000],
        [11.0000]])
hidden tensor([[ 7.1845e-02],
        [ 7.1077e-01],
        [ 8.3568e-01],
        [ 3.0976e-03],
        [-8.5702e-01],
        [-9.4307e+00],
        [ 4.3290e-03],
        [ 7.9703e+00],
        [-7.5243e+00],
        [-2.9198e-01],
        [ 2.2652e-01],
        [ 8.3157e+00],
        [ 8.0844e-01],
        [ 7.6001e-03],
        [ 5.7672e-01],
        [-9.3999e+00],
        [ 9.1233e-01],
        [ 

### 10 Hidden Layers

In [20]:
subclass_model = SubclassModel(10)

optimizer = optim.SGD(subclass_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=subclass_model,
    loss_fn=nn.MSELoss(), # Ya no estamos usando nuestra loss function hecha a mano
    train_x = train_t_un,
    val_x = val_t_un,
    train_y = train_t_c,
    val_y = val_t_c)

print('output', subclass_model(val_t_un))
print('answer', val_t_c)
print('hidden', subclass_model.hidden_linear.weight.grad)

Epoch 1, Training loss 214.64125061035156, Validation loss 59.96287536621094
Epoch 1000, Training loss 4.819217205047607, Validation loss 3.426779270172119
Epoch 2000, Training loss 5.057486534118652, Validation loss 9.091812133789062
Epoch 3000, Training loss 3.3582987785339355, Validation loss 6.536432266235352
Epoch 4000, Training loss 2.20505428314209, Validation loss 4.019712924957275
Epoch 5000, Training loss 2.1684963703155518, Validation loss 4.423313140869141
output tensor([[ 2.0653],
        [11.8438]], grad_fn=<AddmmBackward>)
answer tensor([[ 0.5000],
        [11.0000]])
hidden tensor([[  0.2115],
        [ 17.9567],
        [ -3.9392],
        [  0.8184],
        [ 16.3162],
        [-17.2717],
        [ -0.0281],
        [-13.7747],
        [ 17.8609],
        [  0.0566]])
