<a href="https://colab.research.google.com/github/Russo-Federico/DeepLearningFundamentals/blob/main/IBM-PyTorch-DL/0-PyTorchBasicsForDL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Build a Neural Network

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import sigmoid

Create a NN class

In [2]:
class NN(nn.Module):
  # D_in - input size of the network
  # H - number of neurons
  # D_out - output size of the netowrk
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H) # input layer
    self.linear2 = nn.Linear(H, D_out) # output layer

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = sigmoid(self.linear2(x))
    return x

In [3]:
x = torch.tensor([1.0])

model = NN(1,2,1)
yhat = model(x)
print("yhat:", yhat)

yhat: tensor([0.6209], grad_fn=<SigmoidBackward0>)


In [4]:
model.state_dict()

OrderedDict([('linear1.weight',
              tensor([[0.3446],
                      [0.0838]])),
             ('linear1.bias', tensor([-0.4776, -0.0582])),
             ('linear2.weight', tensor([[ 0.0487, -0.2857]])),
             ('linear2.bias', tensor([0.6154]))])

Build a network using the Sequential module

In [5]:
seq_model = torch.nn.Sequential(
    torch.nn.Linear(1,2),
    torch.nn.Sigmoid(),
    torch.nn.Linear(2,1),
    torch.nn.Sigmoid()
)

yhat = seq_model(x)
print("yhat:", yhat)

yhat: tensor([0.6329], grad_fn=<SigmoidBackward0>)


Train a model

In [6]:
def train(Y, X, model, optimizer, criterion, epochs=100):
  cost = []
  total = 0
  for epoch in range(epochs):
    total = 0
    for x, y in zip(X, Y):
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total += loss.item()
    cost.append(total)
  return cost

In [7]:
model = NN(1,2,1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

X = torch.arange(-20,20,1).view(-1,1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0],1)
Y[(X[:,0] > -4) & (X[:,0] < 4)] = 1.0

cost = train(Y, X, model, optimizer, criterion)

print("cost: ", cost)

cost:  [23.561915576457977, 22.429751574993134, 21.564449042081833, 20.898388355970383, 20.38161814212799, 19.977368354797363, 19.658517077565193, 19.40496288239956, 19.201710298657417, 19.037483602762222, 18.903740420937538, 18.793961003422737, 18.703127428889275, 18.62735678255558, 18.5636186003685, 18.509534522891045, 18.46322701871395, 18.423209965229034, 18.388291880488396, 18.357522055506706, 18.330134481191635, 18.305508837103844, 18.283143222332, 18.262627974152565, 18.243629425764084, 18.22587351500988, 18.209133878350258, 18.193226024508476, 18.17799712717533, 18.16331870853901, 18.149085268378258, 18.13520772755146, 18.121614322066307, 18.108241125941277, 18.09503836929798, 18.081960201263428, 18.068970501422882, 18.056037470698357, 18.04313513636589, 18.030239060521126, 18.01733075082302, 18.004393443465233, 17.991411939263344, 17.97837184369564, 17.965264052152634, 17.952077731490135, 17.93880522251129, 17.92543713748455, 17.911967128515244, 17.898389294743538, 17.88469724

**More hidden neurons**

In [8]:
from torch.utils.data import Dataset, DataLoader

In [9]:
class Data(Dataset):
  def __init__(self):
    self.x = torch.linspace(-20,20,100).view(-1,1)
    self.y = torch.zeros(self.x.shape[0])
    self.y[(self.x[:,0] > -10 & (self.x[:,0] < -5))] = 1
    self.y[(self.x[:,0] > 5 & (self.x[:,0] < 10))] = 1
    self.y = self.y.view(-1,1)
    self.len = self.x.shape[0]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.len

In [10]:
class NN(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = sigmoid(self.linear2(x))
    return x

In [11]:
def train(data_set, model, criterion, train_loader, optimizer, epochs=5, plot_number=10):
  cost = []
  for epoch in range(epochs):
    total = 0
    for x,y in train_loader:
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total += loss.item()
    cost.append(total)
  return cost

In [12]:
model = NN(1,6,1)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
train_dataset = Data()
train_loader = DataLoader(dataset=train_dataset, batch_size=100)

cost = train(train_dataset, model, criterion, train_loader, optimizer, epochs=100)

i = 0
for c in cost:
  i += 1
  if i % 10 == 0:
    print(c)

0.5412892699241638
0.4258846044540405
0.3392331302165985
0.275127649307251
0.2276940941810608
0.19183841347694397
0.16399738192558289
0.14205458760261536
0.12455186992883682
0.11040239036083221


same as above, but using nn.Sequential

In [13]:
model = torch.nn.Sequential(
    torch.nn.Linear(1,6),
    torch.nn.Sigmoid(),
    torch.nn.Linear(6,1),
    torch.nn.Sigmoid()
)

**Multidimensional input data**

In [14]:
class XOR_Data(Dataset):

    # Constructor
    def __init__(self, N_s=100):
        self.x = torch.zeros((N_s, 2))
        self.y = torch.zeros((N_s, 1))
        for i in range(N_s // 4):
            self.x[i, :] = torch.Tensor([0.0, 0.0])
            self.y[i, 0] = torch.Tensor([0.0])

            self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
            self.y[i + N_s // 4, 0] = torch.Tensor([1.0])

            self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
            self.y[i + N_s // 2, 0] = torch.Tensor([1.0])

            self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
            self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])

            self.x = self.x + 0.01 * torch.randn((N_s, 2))
        self.len = N_s

    # Getter
    def __getitem__(self, index):
        return self.x[index],self.y[index]

    # Get Length
    def __len__(self):
        return self.len

In [15]:
# Calculate accuracy

def accuracy(model, data_set):
    return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())

In [16]:
def train(dataset, model, criterion, train_loader, optimizer, epochs=100):
  cost = []
  acc = []
  for epoch in range(epochs):
    total = 0
    for x,y in train_loader:
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total = loss.item()
    acc.append(accuracy(model, dataset))
    cost.append(total)

  return cost, acc

In [17]:
model = NN(2,4,1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
train_dataset = XOR_Data()
train_loader = DataLoader(dataset=train_dataset, batch_size=1)

cost, acc = train(train_dataset, model, criterion, train_loader, optimizer)

In [18]:
for i in range(len(cost)):
  if i % 10 == 0:
    print(f"cost: {cost[i]} - accuracy: {acc[i]}")

cost: 0.8898568153381348 - accuracy: 0.5
cost: 0.6769763231277466 - accuracy: 0.5
cost: 0.6747108697891235 - accuracy: 0.5
cost: 0.673123300075531 - accuracy: 0.5
cost: 0.6713958382606506 - accuracy: 0.5
cost: 0.669532835483551 - accuracy: 0.5
cost: 0.6675400733947754 - accuracy: 0.5
cost: 0.6654186844825745 - accuracy: 0.5
cost: 0.6631607413291931 - accuracy: 0.5
cost: 0.6607485413551331 - accuracy: 0.5


**Multi-Class NNs**

In [19]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [20]:
# for multi-class model the number of neurons in the outoput layer must correpond to the number of target classes
class NN(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = self.linear2(x) # drop the activation function in last layer
    return x

In [21]:
# same NN, but using nn.Sequential

model = torch.nn.Sequential(
    torch.nn.Linear(2,6),
    torch.nn.Sigmoid(),
    torch.nn.Linear(6,3),
)

In [22]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = DataLoader(dataset=validation_dataset, batch_size=5000)

100%|██████████| 9.91M/9.91M [00:00<00:00, 20.3MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 607kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 5.64MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.09MB/s]


In [23]:
# training function

def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}
    for epoch in range(epochs):
        # first train on training data
        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        # then evaluate using validation data
        for x, y in validation_loader:
            #validation
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

In [24]:
model = NN(28 * 28, 100, 10) # 10 classes, digits 0-9

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

In [25]:
print("training loss:", train_results['training_loss'][0])
print("validation accuracy:", train_results['validation_accuracy'][0])

print("\ntraining loss:", train_results['training_loss'][-1])
print("validation accuracy:", train_results['validation_accuracy'][-1])

training loss: 2.3496439456939697
validation accuracy: 10.32

training loss: 1.9734208583831787
validation accuracy: 62.07


**Backpropagation and Activation Functions**
<br><br>
Backpropagation: compute the gradient for updating the weights
<br>
Activation Functions: add non linearity to the model
<br>
* **sigmoid**: 0,1 interval, suffers from vanishing gradient problem
* **tanh**: similar to sigmoid, but with -1,1 interval
* **relu**: y = max(0,x), helps against vanishing gradient problem



In [26]:
# NN w/ sigmoid activation function

class Net_Sig(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [27]:
# NN w/ tanh activation function

class Net_Tanh(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.tanh(self.linear1(x))
    x = self.linear2(x)
    return x

In [28]:
# NN w/ relu activation function

class Net_Relu(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.relu(self.linear1(x))
    x = self.linear2(x)
    return x

In [29]:
# NN w/ tanh activation function using nn.Sequential
model_tanh = nn.Sequential(
    nn.Linear(28 * 28, 100),
    nn.Tanh(),
    nn.Linear(100, 10)
)

# NN w/ relu activation function using nn.Sequential
model_relu = nn.Sequential(
    nn.Linear(28 * 28, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

# Deep NNs

In [30]:
# deep NN w/ two hidden layers
# D_in -> dimension input features
# H1, H2 -> number of neurons in first and second hidden layer
# D_out -> dimension of output (equal to the numebr of classes, one for regression)

class Dnn(nn.Module):
  def __init__(self, D_in, H1, H2, D_out):
    super(Dnn, self).__init__()
    self.linear1 = nn.Linear(D_in, H1)
    self.linear2 = nn.Linear(H1, H2)
    self.linear3 = nn.Linear(H2, D_out)

  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = torch.sigmoid(self.linear2(x))
    x = self.linear3(x)
    return x

In [31]:
model = Dnn(3,3,4,3)
print(model.parameters)

<bound method Module.parameters of Dnn(
  (linear1): Linear(in_features=3, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (linear3): Linear(in_features=4, out_features=3, bias=True)
)>


In [32]:
model = nn.Sequential(
    nn.Linear(3,3),
    nn.Sigmoid(),
    nn.Linear(3,4),
    nn.Sigmoid(),
    nn.Linear(4,3)
)

print(model.parameters)

<bound method Module.parameters of Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=3, out_features=4, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=4, out_features=3, bias=True)
)>


Build a DNN using **nn.ModuleList()**

In [33]:
class Dnn(nn.Module):
  def __init__(self, Layers):
    super(Dnn, self).__init__()
    self.hidden = nn.ModuleList()
    # it's like doing zip([2,3,4], [3,4,3])
    for input_size, output_size in zip(Layers, Layers[1:]):
      # couples: (2,3) - (3,4) - (4,3)
      self.hidden.append(nn.Linear(input_size, output_size))

  def forward(self, activation):
    L = len(self.hidden)
    for(l, linear_transform) in zip(range(L), self.hidden):
      if l < L - 1:
        activation = torch.relu(linear_transform(activation))
      else:
        activation = linear_transform(activation)
    return activation

Layers = [2,3,4,3]
model = Dnn(Layers)

print(model.parameters)

<bound method Module.parameters of Dnn(
  (hidden): ModuleList(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): Linear(in_features=3, out_features=4, bias=True)
    (2): Linear(in_features=4, out_features=3, bias=True)
  )
)>


Dropout regularisation to prevent overfitting

In [34]:
class Dnn(nn.Module):
  def __init__(self, in_size, n_hidden, out_size, p=0):
    super(Dnn, self).__init__()
    # create a dropout object passing the probability p of turning off a neuron
    self.drop = nn.Dropout(p=p)
    self.linear1 = nn.Linear(in_size, n_hidden)
    self.linear2 = nn.Linear(n_hidden, n_hidden)
    self.linear3 = nn.Linear(n_hidden, out_size)

  def forward(self, x):
    x = torch.relu(self.linear1(x))
    x = self.drop(x)
    x = torch.relu(self.linear2(x))
    x = self.drop(x)
    x = self.linear3(x)
    return x

model_drop = Dnn(2,300,2,0.5)

# specify that we are in the training phase -> dropout enabled
model_drop.train()
print(model_drop.parameters)

# specify that we are in the evaluation phase -> dropout disabled
model_drop.eval()
print(model_drop.parameters)

<bound method Module.parameters of Dnn(
  (drop): Dropout(p=0.5, inplace=False)
  (linear1): Linear(in_features=2, out_features=300, bias=True)
  (linear2): Linear(in_features=300, out_features=300, bias=True)
  (linear3): Linear(in_features=300, out_features=2, bias=True)
)>
<bound method Module.parameters of Dnn(
  (drop): Dropout(p=0.5, inplace=False)
  (linear1): Linear(in_features=2, out_features=300, bias=True)
  (linear2): Linear(in_features=300, out_features=300, bias=True)
  (linear3): Linear(in_features=300, out_features=2, bias=True)
)>


To avoid getting stuck in a local minimum, it's possible to add a momentum parameter to the optimizer as in the example below

In [35]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.4)