<a href="https://colab.research.google.com/github/Russo-Federico/DeepLearningFundamentals/blob/main/IBM-PyTorch-DL/0-PyTorchBasicsForDL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Build a Neural Network

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import sigmoid

Create a NN class

In [2]:
class NN(nn.Module):
  # D_in - input size of the network
  # H - number of neurons
  # D_out - output size of the netowrk
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H) # input layer
    self.linear2 = nn.Linear(H, D_out) # output layer

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = sigmoid(self.linear2(x))
    return x

In [3]:
x = torch.tensor([1.0])

model = NN(1,2,1)
yhat = model(x)
print("yhat:", yhat)

yhat: tensor([0.5643], grad_fn=<SigmoidBackward0>)


In [4]:
model.state_dict()

OrderedDict([('linear1.weight',
              tensor([[-0.6540],
                      [-0.2665]])),
             ('linear1.bias', tensor([ 0.6798, -0.9020])),
             ('linear2.weight', tensor([[ 0.6610, -0.2444]])),
             ('linear2.bias', tensor([-0.0181]))])

Build a network using the Sequential module

In [5]:
seq_model = torch.nn.Sequential(
    torch.nn.Linear(1,2),
    torch.nn.Sigmoid(),
    torch.nn.Linear(2,1),
    torch.nn.Sigmoid()
)

yhat = seq_model(x)
print("yhat:", yhat)

yhat: tensor([0.6243], grad_fn=<SigmoidBackward0>)


Train a model

In [6]:
def train(Y, X, model, optimizer, criterion, epochs=100):
  cost = []
  total = 0
  for epoch in range(epochs):
    total = 0
    for x, y in zip(X, Y):
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total += loss.item()
    cost.append(total)
  return cost

In [7]:
model = NN(1,2,1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

X = torch.arange(-20,20,1).view(-1,1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0],1)
Y[(X[:,0] > -4) & (X[:,0] < 4)] = 1.0

cost = train(Y, X, model, optimizer, criterion)

print("cost: ", cost)

cost:  [22.109044402837753, 21.13212439417839, 20.38023230433464, 19.7957224547863, 19.336709439754486, 18.97259023785591, 18.680828988552094, 18.44469502568245, 18.251649752259254, 18.092217564582825, 17.95917296409607, 17.846962600946426, 17.75128497183323, 17.668784022331238, 17.596823900938034, 17.53332108259201, 17.476620882749557, 17.425398379564285, 17.378590300679207, 17.335340082645416, 17.294952616095543, 17.25686340034008, 17.220609426498413, 17.1858132481575, 17.15216553211212, 17.11940921843052, 17.08733355998993, 17.055763840675354, 17.024554938077927, 16.993584975600243, 16.96275319159031, 16.93197710812092, 16.901186406612396, 16.870322838425636, 16.839338093996048, 16.808189675211906, 16.776846766471863, 16.745277270674706, 16.713462218642235, 16.681378811597824, 16.64901426434517, 16.616353824734688, 16.5833892673254, 16.550111055374146, 16.516516119241714, 16.482599169015884, 16.448358863592148, 16.413795441389084, 16.37890875339508, 16.343702122569084, 16.3081790655

**More hidden neurons**

In [8]:
from torch.utils.data import Dataset, DataLoader

In [9]:
class Data(Dataset):
  def __init__(self):
    self.x = torch.linspace(-20,20,100).view(-1,1)
    self.y = torch.zeros(self.x.shape[0])
    self.y[(self.x[:,0] > -10 & (self.x[:,0] < -5))] = 1
    self.y[(self.x[:,0] > 5 & (self.x[:,0] < 10))] = 1
    self.y = self.y.view(-1,1)
    self.len = self.x.shape[0]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.len

In [10]:
class NN(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = sigmoid(self.linear2(x))
    return x

In [11]:
def train(data_set, model, criterion, train_loader, optimizer, epochs=5, plot_number=10):
  cost = []
  for epoch in range(epochs):
    total = 0
    for x,y in train_loader:
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total += loss.item()
    cost.append(total)
  return cost

In [12]:
model = NN(1,6,1)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
train_dataset = Data()
train_loader = DataLoader(dataset=train_dataset, batch_size=100)

cost = train(train_dataset, model, criterion, train_loader, optimizer, epochs=100)

i = 0
for c in cost:
  i += 1
  if i % 10 == 0:
    print(c)

0.5749016404151917
0.46442973613739014
0.37221625447273254
0.29882100224494934
0.2430441975593567
0.20110797882080078
0.1694575697183609
0.14529277384281158
0.1265193223953247
0.11164810508489609


same as above, but using nn.Sequential

In [13]:
model = torch.nn.Sequential(
    torch.nn.Linear(1,6),
    torch.nn.Sigmoid(),
    torch.nn.Linear(6,1),
    torch.nn.Sigmoid()
)

**Multidimensional input data**

In [14]:
class XOR_Data(Dataset):

    # Constructor
    def __init__(self, N_s=100):
        self.x = torch.zeros((N_s, 2))
        self.y = torch.zeros((N_s, 1))
        for i in range(N_s // 4):
            self.x[i, :] = torch.Tensor([0.0, 0.0])
            self.y[i, 0] = torch.Tensor([0.0])

            self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
            self.y[i + N_s // 4, 0] = torch.Tensor([1.0])

            self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
            self.y[i + N_s // 2, 0] = torch.Tensor([1.0])

            self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
            self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])

            self.x = self.x + 0.01 * torch.randn((N_s, 2))
        self.len = N_s

    # Getter
    def __getitem__(self, index):
        return self.x[index],self.y[index]

    # Get Length
    def __len__(self):
        return self.len

In [15]:
# Calculate accuracy

def accuracy(model, data_set):
    return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())

In [16]:
def train(dataset, model, criterion, train_loader, optimizer, epochs=100):
  cost = []
  acc = []
  for epoch in range(epochs):
    total = 0
    for x,y in train_loader:
      yhat = model(x)
      loss = criterion(yhat, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      total = loss.item()
    acc.append(accuracy(model, dataset))
    cost.append(total)

  return cost, acc

In [17]:
model = NN(2,4,1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
train_dataset = XOR_Data()
train_loader = DataLoader(dataset=train_dataset, batch_size=1)

cost, acc = train(train_dataset, model, criterion, train_loader, optimizer)

In [18]:
for i in range(len(cost)):
  if i % 10 == 0:
    print(f"cost: {cost[i]} - accuracy: {acc[i]}")

cost: 0.587478756904602 - accuracy: 0.5
cost: 0.6765233278274536 - accuracy: 0.29
cost: 0.6771535277366638 - accuracy: 0.28
cost: 0.6772976517677307 - accuracy: 0.27
cost: 0.6773310899734497 - accuracy: 0.27
cost: 0.6772513389587402 - accuracy: 0.26
cost: 0.6770526170730591 - accuracy: 0.25
cost: 0.6767285466194153 - accuracy: 0.25
cost: 0.6762719750404358 - accuracy: 0.25
cost: 0.675674319267273 - accuracy: 0.25


**Multi-Class NNs**

In [19]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [20]:
# for multi-class model the number of neurons in the outoput layer must correpond to the number of target classes
class NN(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = sigmoid(self.linear1(x))
    x = self.linear2(x) # drop the activation function in last layer
    return x

In [21]:
# same NN, but using nn.Sequential

model = torch.nn.Sequential(
    torch.nn.Linear(2,6),
    torch.nn.Sigmoid(),
    torch.nn.Linear(6,3),
)

In [22]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = DataLoader(dataset=validation_dataset, batch_size=5000)

100%|██████████| 9.91M/9.91M [00:00<00:00, 35.2MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 24.8MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 45.6MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.00MB/s]


In [23]:
# training function

def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}
    for epoch in range(epochs):
        # first train on training data
        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        # then evaluate using validation data
        for x, y in validation_loader:
            #validation
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

In [24]:
model = NN(28 * 28, 100, 10) # 10 classes, digits 0-9

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

In [25]:
print("training loss:", train_results['training_loss'][0])
print("validation accuracy:", train_results['validation_accuracy'][0])

print("\ntraining loss:", train_results['training_loss'][-1])
print("validation accuracy:", train_results['validation_accuracy'][-1])

training loss: 2.336042881011963
validation accuracy: 11.28

training loss: 1.9470173120498657
validation accuracy: 63.029999999999994


**Backpropagation and Activation Functions**
<br><br>
Backpropagation: compute the gradient for updating the weights
<br>
Activation Functions: add non linearity to the model
<br>
* **sigmoid**: 0,1 interval, suffers from vanishing gradient problem
* **tanh**: similar to sigmoid, but with -1,1 interval
* **relu**: y = max(0,x), helps against vanishing gradient problem



In [26]:
# NN w/ sigmoid activation function

class Net_Sig(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [27]:
# NN w/ tanh activation function

class Net_Tanh(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.tanh(self.linear1(x))
    x = self.linear2(x)
    return x

In [28]:
# NN w/ relu activation function

class Net_Relu(nn.Module):
  def __init__(self, D_in, H, D_out):
    super(Net_Tanh, self).__init__()
    self.linear1 = nn.Linear(D_in, H)
    self.linear2 = nn.Linear(H, D_out)

  def forward(self, x):
    x = torch.relu(self.linear1(x))
    x = self.linear2(x)
    return x

In [29]:
# NN w/ tanh activation function using nn.Sequential
model_tanh = nn.Sequential(
    nn.Linear(28 * 28, 100),
    nn.Tanh(),
    nn.Linear(100, 10)
)

# NN w/ relu activation function using nn.Sequential
model_relu = nn.Sequential(
    nn.Linear(28 * 28, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

# Deep NNs

In [32]:
# deep NN w/ two hidden layers
# D_in -> dimension input features
# H1, H2 -> number of neurons in first and second hidden layer
# D_out -> dimension of output (equal to the numebr of classes, one for regression)

class Dnn(nn.Module):
  def __init__(self, D_in, H1, H2, D_out):
    super(Dnn, self).__init__()
    self.linear1 = nn.Linear(D_in, H1)
    self.linear2 = nn.Linear(H1, H2)
    self.linear3 = nn.Linear(H2, D_out)

  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = torch.sigmoid(self.linear2(x))
    x = self.linear3(x)
    return x

In [34]:
model = Dnn(3,3,4,3)
print(model.parameters)

<bound method Module.parameters of Dnn(
  (linear1): Linear(in_features=3, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (linear3): Linear(in_features=4, out_features=3, bias=True)
)>


In [35]:
model = nn.Sequential(
    nn.Linear(3,3),
    nn.Sigmoid(),
    nn.Linear(3,4),
    nn.Sigmoid(),
    nn.Linear(4,3)
)

print(model.parameters)

<bound method Module.parameters of Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=3, out_features=4, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=4, out_features=3, bias=True)
)>
