# Deep Learning
## Practical Deep Learning Tutorial with PyTorch - Tutorial N° 3

### 2020-2021

# Importing necessary libraries


In [1]:
import torch
import torch.nn as nn
from torch.autograd import grad
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs

# Adaline

1. Built ADALINE model using the nn.Module class






In [2]:
class ADALINE(nn.Module):
    def __init__(self, num_features):
        super(ADALINE, self).__init__()
        self.linear = torch.nn.Linear(num_features, 1)
        self.linear.weight.detach().zero_()
        self.linear.bias.detach().zero_()

    def forward(self, x):
        netinputs = self.linear(x)
        activations = netinputs
        return activations.view(-1)

2. Using 'iris.txt', create a binary datasets in 2-D : The last 100 instances of iris described only by the 2nd and 3rd features
    
    Split the dataset into traing and test sets (70%,30%) 

    Normalize the dataset

In [3]:
import pandas as pd 
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv('iris.txt')
df.columns = ['col{}'.format(i) for i in range(df.shape[1])]
df=df.rename(columns={df.columns[-1]:'target'})
df['target'] = df['target'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1)

df.head()


Unnamed: 0,col0,col1,col2,col3,target
0,4.9,3.0,1.4,0.2,1
1,4.7,3.2,1.3,0.2,1
2,4.6,3.1,1.5,0.2,1
3,5.0,3.6,1.4,0.2,1
4,5.4,3.9,1.7,0.4,1


In [5]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values
X = (X-np.mean(X,axis=0)) / np.std(X,axis=0)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)

#converting to torch object 
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

print(X_train.shape,X_train.shape,y_train.shape,y_test.shape)

torch.Size([104, 4]) torch.Size([104, 4]) torch.Size([104]) torch.Size([45])


3. Train the model : we will use MSELoss (mean squared error (squared L2 norm)) as loss function. The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [6]:
import torch.nn.functional as F

def loss_func(yhat, y):
    return torch.mean((yhat - y)**2)

def train(model, x, y, num_epochs):
    cost = []
    
    for e in range(num_epochs):
    
        yhat = model.forward(x)
        loss = F.mse_loss(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            yhat = model.forward(x)
            curr_loss = loss_func(yhat, y)
            if (e+1) % 10 == 0:
                print(f'Epoch [{e+1}/{num_epochs}], Loss: {curr_loss:.3f}')
            cost.append(curr_loss)

    return cost

4. Compute the model accuracy 

In [None]:
model = ADALINE(num_features=X_train.size(1))
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

cost = train(model, 
             X_train, y_train.float(),
             num_epochs=200)

In [8]:
ones = torch.ones(y_train.size())
zeros = torch.zeros(y_train.size())
train_pred = model.forward(X_train)
train_acc = torch.mean(
    (torch.where(train_pred > 0.5, 
                 ones, 
                 zeros).int() == y_train).float())

ones = torch.ones(y_test.size())
zeros = torch.zeros(y_test.size())
test_pred = model.forward(X_test)
test_acc = torch.mean(
    (torch.where(test_pred > 0.5, 
                 ones, 
                 zeros).int() == y_test).float())

print(f'Training Accuracy: {train_acc*100: .2f}%')
print(f'Test Accuracy: {test_acc*100: .2f}%')

Training Accuracy:  71.15%
Test Accuracy:  75.56%


# Perceptron

5. Built a Perceptron model using nn.Module class

In [9]:
class Perceptron(torch.nn.Module):

    def __init__(self, num_features):
        super(Perceptron, self).__init__()
        self.linear = torch.nn.Linear(num_features, 1)
        self.linear.weight.detach().zero_()
        self.linear.bias.detach().zero_()

        
    def forward(self, x):
        logits = self.linear(x)
        probas = torch.sigmoid(logits)
        return probas


6. Load the 'perceptron_toydata' dataset

    Split the dataset into train and test sets
    
    Normalize the data

In [10]:
df=pd.read_csv('perceptron_toydata.txt',sep='\s+', skipinitialspace=True,names=[f'col{i}' for i in range(3)])

X=df.iloc[:,:-1].values 
y=df['col2'].values
X_train,X_test,y_train,y_test = train_test_split( X, y, test_size=0.3, random_state=42)

m,s = np.mean(X_train,axis=0), np.std(X_train,axis=0)
X_train = (X_train-m)/s
X_test = (X_test-m)/s

X_train = torch.tensor(X_train,dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32).view(-1,1)
y_test = torch.tensor(y_test,dtype=torch.float32)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

torch.Size([70, 2]) torch.Size([30, 2]) torch.Size([70, 1]) torch.Size([30])


7. Train the perceptron

In [11]:
model = Perceptron(num_features=X_train.size(1))
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

def comp_accuracy(label_var, pred_probas):
    pred_labels = torch.where((pred_probas > 0.5), 
                              torch.tensor([1]), 
                              torch.tensor([0])).view(-1)
    acc = torch.sum(pred_labels == label_var.view(-1)).float() / label_var.size(0)
    return acc
num_epochs = 100



for epoch in range(num_epochs):
    
    #### Compute outputs ####
    out = model.forward(X_train)
    
    cost = F.binary_cross_entropy(out, y_train, reduction='sum')
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    with torch.no_grad():     
        pred_probas = model.forward(X_train)
        acc = comp_accuracy(y_train, pred_probas)
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {acc:.3f} Loss: {F.binary_cross_entropy(pred_probas, y_train):.3f}')

    


Epoch [10/100], Accuracy: 0.971 Loss: 0.149
Epoch [20/100], Accuracy: 0.971 Loss: 0.100
Epoch [30/100], Accuracy: 0.971 Loss: 0.081
Epoch [40/100], Accuracy: 0.971 Loss: 0.070
Epoch [50/100], Accuracy: 0.971 Loss: 0.063
Epoch [60/100], Accuracy: 0.971 Loss: 0.058
Epoch [70/100], Accuracy: 0.971 Loss: 0.054
Epoch [80/100], Accuracy: 0.986 Loss: 0.051
Epoch [90/100], Accuracy: 0.986 Loss: 0.049
Epoch [100/100], Accuracy: 0.986 Loss: 0.047


8. evaluate the model (accuracy)

In [None]:
### the answer is in the cell above

# Multi Layer Perceptron

Unlike the single-layer perceptron, the Multi Layer Perceptron models have hidden layers
between the input and the output layers. After every hidden layer, an activation function 
is applied to introduce non-linearity. 

9. Built a simple Multi Layer Perceptron model withe one hidden layer. 
After the hidden layer, we will use ReLU as activation before the information is sent to the output layer.
As an output activation function, we will use Sigmoid. 

In [24]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()



    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

    def train(self, X_train, y_train, learning_rate, num_epochs):
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate)

        for epoch in range(num_epochs):
            running_loss = 0.0
            for i, data in enumerate(zip(X_train, y_train)):
                inputs, labels = data
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            if((epoch+1)%10 == 0):
                print('Epoch {}/{}, Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/len(X_train)))

    def accuracy(self, X_train, y_train, X_test, y_test):
        train_outputs = self(X_train)
        _, train_predicted = torch.max(train_outputs.data, 1)
        train_correct = (train_predicted == y_train).sum().item()
        train_accuracy = train_correct / len(X_train)

        test_outputs = self(X_test)
        _, test_predicted = torch.max(test_outputs.data, 1)
        test_correct = (test_predicted == y_test).sum().item()
        test_accuracy = test_correct / len(X_test)

        print('Train Accuracy: {:.2f}%'.format(train_accuracy * 100))
        print('Test Accuracy: {:.2f}%'.format(test_accuracy * 100))

10. Create a random datasets and assign binary labels {0,1}

In [15]:

# Generate random training data
np.random.seed(0)
input_size = 2
train_size = 100
test_size = int(train_size*0.2) #20% of the train size
X_train = torch.tensor(np.random.rand(train_size, 2), dtype=torch.float32)
y_train = torch.tensor(np.random.randint(0, 2, size=(train_size,)), dtype=torch.long)

# Generate random test data
X_test = torch.tensor(np.random.rand(test_size, input_size), dtype=torch.float32)
y_test = torch.tensor(np.random.randint(0, 2, size=(test_size,)), dtype=torch.long)



In [16]:
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

torch.Size([100, 2]) torch.Size([20, 2]) torch.Size([100]) torch.Size([20])


11. Define the model with input dimension 2 and hidden dimension 10. 
Since the task is to classify binary labels, we can use BCELoss (Binary Cross Entropy Loss) as loss function.
The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [25]:
hidden_size = 10
output_size = 2

mlp = MLP(input_size, hidden_size, output_size)


12. Check the test loss before the model training and compare it with the test loss after the training.

In [27]:
mlp.train(X_train, y_train, learning_rate=0.1, num_epochs=200)
mlp.accuracy(X_train,y_train,X_test, y_test)

Epoch 10/200, Loss: 0.6836
Epoch 20/200, Loss: 0.6833
Epoch 30/200, Loss: 0.6830
Epoch 40/200, Loss: 0.6827
Epoch 50/200, Loss: 0.6823
Epoch 60/200, Loss: 0.6819
Epoch 70/200, Loss: 0.6817
Epoch 80/200, Loss: 0.6814
Epoch 90/200, Loss: 0.6811
Epoch 100/200, Loss: 0.6808
Epoch 110/200, Loss: 0.6807
Epoch 120/200, Loss: 0.6802
Epoch 130/200, Loss: 0.6801
Epoch 140/200, Loss: 0.6799
Epoch 150/200, Loss: 0.6797
Epoch 160/200, Loss: 0.6795
Epoch 170/200, Loss: 0.6794
Epoch 180/200, Loss: 0.6792
Epoch 190/200, Loss: 0.6790
Epoch 200/200, Loss: 0.6790
Train Accuracy: 60.00%
Test Accuracy: 45.00%


13. In order to improve the model, you can try out different parameter values for your
hyperparameters(ie. hidden dimension size, epoch size, learning rates). You can also 
try changing the structure of your model (ie. adding more hidden layers) to see if your
mode improves. 

In [28]:
hidden_sizes = [2,3,5]
epochs = [20,50,200]

for h in hidden_sizes:
    for e in epochs:
        print(f"\n**hiden_size={h} --- epochs={e}**\n")
        mlp = MLP(input_size, h, output_size)
        mlp.train(X_train, y_train, learning_rate=0.1, num_epochs=e)
        mlp.accuracy(X_train,y_train,X_test, y_test)


**hiden_size=2 --- epochs=20**

Epoch 10/20, Loss: 0.6946
Epoch 20/20, Loss: 0.6939
Train Accuracy: 58.00%
Test Accuracy: 40.00%

**hiden_size=2 --- epochs=50**

Epoch 10/50, Loss: 0.6921
Epoch 20/50, Loss: 0.6895
Epoch 30/50, Loss: 0.6876
Epoch 40/50, Loss: 0.6864
Epoch 50/50, Loss: 0.6854
Train Accuracy: 58.00%
Test Accuracy: 45.00%

**hiden_size=2 --- epochs=200**

Epoch 10/200, Loss: 0.6926
Epoch 20/200, Loss: 0.6898
Epoch 30/200, Loss: 0.6870
Epoch 40/200, Loss: 0.6860
Epoch 50/200, Loss: 0.6855
Epoch 60/200, Loss: 0.6853
Epoch 70/200, Loss: 0.6853
Epoch 80/200, Loss: 0.6854
Epoch 90/200, Loss: 0.6850
Epoch 100/200, Loss: 0.6851
Epoch 110/200, Loss: 0.6849
Epoch 120/200, Loss: 0.6847
Epoch 130/200, Loss: 0.6845
Epoch 140/200, Loss: 0.6839
Epoch 150/200, Loss: 0.6840
Epoch 160/200, Loss: 0.6837
Epoch 170/200, Loss: 0.6833
Epoch 180/200, Loss: 0.6830
Epoch 190/200, Loss: 0.6828
Epoch 200/200, Loss: 0.6825
Train Accuracy: 58.00%
Test Accuracy: 45.00%

**hiden_size=3 --- epochs=20**


### Same code but with learning rate = 0.01 instead of 0.1

In [29]:
hidden_sizes = [2,3,5]
epochs = [20,50,200]

for h in hidden_sizes:
    for e in epochs:
        print(f"\n**hiden_size={h} --- epochs={e}**\n")
        mlp = MLP(input_size, h, output_size)
        mlp.train(X_train, y_train, learning_rate=0.01, num_epochs=e)
        mlp.accuracy(X_train,y_train,X_test, y_test)


**hiden_size=2 --- epochs=20**

Epoch 10/20, Loss: 0.6937
Epoch 20/20, Loss: 0.6921
Train Accuracy: 52.00%
Test Accuracy: 55.00%

**hiden_size=2 --- epochs=50**

Epoch 10/50, Loss: 0.6934
Epoch 20/50, Loss: 0.6932
Epoch 30/50, Loss: 0.6930
Epoch 40/50, Loss: 0.6928
Epoch 50/50, Loss: 0.6927
Train Accuracy: 53.00%
Test Accuracy: 60.00%

**hiden_size=2 --- epochs=200**

Epoch 10/200, Loss: 0.6987
Epoch 20/200, Loss: 0.6963
Epoch 30/200, Loss: 0.6949
Epoch 40/200, Loss: 0.6942
Epoch 50/200, Loss: 0.6938
Epoch 60/200, Loss: 0.6936
Epoch 70/200, Loss: 0.6934
Epoch 80/200, Loss: 0.6934
Epoch 90/200, Loss: 0.6933
Epoch 100/200, Loss: 0.6933
Epoch 110/200, Loss: 0.6933
Epoch 120/200, Loss: 0.6933
Epoch 130/200, Loss: 0.6933
Epoch 140/200, Loss: 0.6933
Epoch 150/200, Loss: 0.6933
Epoch 160/200, Loss: 0.6933
Epoch 170/200, Loss: 0.6933
Epoch 180/200, Loss: 0.6933
Epoch 190/200, Loss: 0.6933
Epoch 200/200, Loss: 0.6933
Train Accuracy: 50.00%
Test Accuracy: 35.00%

**hiden_size=3 --- epochs=20**
