In [None]:
#!pip3 install sklearn # or pip 
#!pip3 install numpy
#!pip3 install torch torchvision 


In [1]:
import numpy as np
import torch as t
from   matplotlib import pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
from   torch.autograd import Variable

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import StandardScaler

iris = load_iris()
X = iris['data']
y = iris['target']
print("Sameple x", X[:3])
print("Sample y", y[:3])
names = iris['target_names']
feature_names = iris['feature_names']
print("Labels iris ", names)
print("Feature names", feature_names)
# Scale data to have mean 0 and variance 1 
# which is importance for convergence of the neural network
# removes mean and divides by standard deviation
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Sameple x scaled\n", X_scaled[:3])

# Split the data set into training and testing
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, shuffle = True, random_state=2)

Sameple x [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]]
Sample y [0 0 0]
Labels iris  ['setosa' 'versicolor' 'virginica']
Feature names ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Sameple x scaled
 [[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]]


In [2]:
print(X_scaled.shape, X_train.shape, X_val.shape)
print(X_scaled[:3])
print(y[:3])

(150, 4) (120, 4) (30, 4)
[[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]]
[0 0 0]


In [3]:
# convert data from numpy to tensors
X_t_train = t.from_numpy(X_train).float()
y_t_train = t.flatten(t.from_numpy(y_train).long()) # flatten - creates a one dimensional tensor
X_t_val   = t.from_numpy(X_val).float()
y_t_val   = t.flatten(t.from_numpy(y_val).long())

print(X_t_train.shape, y_t_train.shape)
print(X_t_train.dtype, y_t_train.dtype)
print(X_t_val.shape,   y_t_val.shape)
print(X_t_val.dtype,   y_t_val.dtype)

torch.Size([120, 4]) torch.Size([120])
torch.float32 torch.int64
torch.Size([30, 4]) torch.Size([30])
torch.float32 torch.int64


In [67]:
# DataLoader = a class that shuffles the data and splits in into batches
# you should use it during training (SGD - accumulate error over batches of data )
train_data = [(X_t_train[i], y_t_train[i]) for i in range(X_t_train.shape[0])]
print("Sample train_data = ", train_data[:3], " type = ", type(train_data))
trainloader = t.utils.data.DataLoader(train_data, batch_size = 10, shuffle=True)
for x,label in trainloader:  # shuffles the data
    print(x,label)

Sample train_data =  [(tensor([ 0.4322, -0.5924,  0.5922,  0.7907]), tensor(2)), (tensor([-0.9007,  0.5586, -1.1697, -0.9205]), tensor(0)), (tensor([-0.2948, -0.3622, -0.0898,  0.1325]), tensor(1))]  type =  <class 'list'>
tensor([[-1.7489, -0.1320, -1.3971, -1.3154],
        [ 0.7957, -0.1320,  0.8196,  1.0539],
        [-0.1737,  3.0908, -1.2834, -1.0522],
        [-0.9007,  1.0190, -1.3402, -1.1838],
        [-1.0218,  1.2492, -1.3402, -1.3154],
        [-0.7795,  1.0190, -1.2834, -1.3154],
        [ 0.4322, -0.5924,  0.5922,  0.7907],
        [ 1.0380,  0.5586,  1.1038,  1.1856],
        [-1.1430, -0.1320, -1.3402, -1.3154],
        [ 1.4015,  0.3284,  0.5354,  0.2641]]) tensor([0, 2, 0, 0, 0, 0, 2, 2, 0, 1])
tensor([[ 0.3110, -0.1320,  0.4786,  0.2641],
        [-1.8700, -0.1320, -1.5107, -1.4471],
        [-1.1430,  1.2492, -1.3402, -1.4471],
        [ 1.0380,  0.0982,  1.0469,  1.5805],
        [ 1.1592, -0.1320,  0.9901,  1.1856],
        [ 1.2803,  0.0982,  0.6491,  0.3958],
 

In [69]:
class Model(nn.Module):
    def __init__(self, input_dim = 4):
         
        super(Model, self).__init__()
        self.layer1   = nn.Linear(in_features=input_dim, out_features = 15)
        self.dropout1 = nn.Dropout(p = 0.3) # drop 30% of output nodes from the previous layer during training only 
        self.layer2   = nn.Linear(in_features= 15, out_features = 12)
        self.dropout2 = nn.Dropout(p = 0.25)
        self.layer3   = nn.Linear(in_features = 12, out_features = 3) # 3 neurons = one for each class

        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.dropout1(x)
        x = F.relu(self.layer2(x))
        x = self.dropout2(x)
        x = self.layer3(x)
        return x


In [98]:
model     = Model(X_train.shape[1])   # X_train.shape[1]
optimizer = t.optim.Adam(model.parameters(), lr=0.001)
loss_fn   = nn.CrossEntropyLoss()
print(model)

y0 = model.forward(X_t_train[0,:])
print(y0)

Model(
  (layer1): Linear(in_features=4, out_features=15, bias=True)
  (dropout1): Dropout(p=0.3, inplace=False)
  (layer2): Linear(in_features=15, out_features=12, bias=True)
  (dropout2): Dropout(p=0.25, inplace=False)
  (layer3): Linear(in_features=12, out_features=3, bias=True)
)
tensor([ 0.0011,  0.0695, -0.2049], grad_fn=<AddBackward0>)


In [99]:
train_model(n_epochs = 700, model = model, 
            train_loader = trainloader, optimizer = optimizer, 
            loss_fn = loss_fn, 
            x_val   = X_t_val,   y_val = y_t_val)

Epoch 0, Training loss 1.0436, Validation loss 1.0564
Epoch 1, Training loss 1.0315, Validation loss 1.0324
Epoch 100, Training loss 0.2047, Validation loss 0.0767
Epoch 200, Training loss 0.1565, Validation loss 0.0591
Epoch 300, Training loss 0.2439, Validation loss 0.0538
Epoch 400, Training loss 0.0073, Validation loss 0.0619
Epoch 500, Training loss 0.0164, Validation loss 0.0657
Epoch 600, Training loss 0.0838, Validation loss 0.0718
Epoch 700, Training loss 0.0437, Validation loss 0.0623


In [97]:
def train_model(n_epochs, model, train_loader, optimizer, loss_fn, 
                x_val, y_val):
    for epoch in range(n_epochs+1):
        for xb,yb in train_loader: # for each batch
            model.train()       # set model in training mode = with dropout
            ym = model.forward(xb)
            loss = loss_fn(ym,yb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if epoch == 1 or epoch % 100 == 0:
            model.eval() # set model in evaluation mode = no dropout
            with t.no_grad():  # no learning
                ym_val   = model.forward(x_val)
                loss_val = loss_fn(ym_val,y_val)

            print(f"Epoch {epoch}, Training loss {loss.item():.4f},"
f" Validation loss {loss_val.item():.4f}")



In [100]:
soft_max  = t.nn.Softmax(1) 
y_m_train = soft_max(model.forward(X_t_train)) # y_m normalized with softmax
y_m_train = t.argmax(y_m_train,dim = 1)

y_m_val   = soft_max(model.forward(X_t_val))
y_m_val   = t.argmax(y_m_val,dim = 1)
correct_pred_val = t.sum(y_m_val == y_t_val)/y_m_val.shape[0]
print("Validation accuracy = ", correct_pred_val)

correct_pred_train = t.sum(y_m_train == y_t_train)/y_m_train.shape[0]
print("Training accuracy = ", correct_pred_train)

Validation accuracy =  tensor(0.9667)
Training accuracy =  tensor(0.9833)
