# Simple Iris dataset classification with Torch

Multi-class classification of the iris dataset with a fully connected net.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


class fcnn(nn.Module):
    def __init__(self, input_features=4, hidden_size=5, output_classes=3):
        """ iris dataset has 4 features and 3 flower species (classes) """
        super().__init__()
        self.layer1 = nn.Linear(input_features, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_classes)       

    def forward(self, x):
        x = torch.sigmoid(self.layer1(x))
        x = self.layer2(x)
        return x

Let's import the dataset

In [8]:
iris = datasets.load_iris()
df = np.c_[iris.data, iris.target]

np.random.shuffle(df)

X = df[:, :-1]
y = df[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).long()
y_test = torch.from_numpy(y_test).long()

First we make an instance of the network using the class *fcnn* above then we define the optimizer. We use the [stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent). The third fundamental ingredient is defining the loss function. We use [cross entropy](https://pytorch.org/docs/master/nn.html#torch.nn.CrossEntropyLoss). Different python packages have slightly different definitions of cross-entropy but Torch's documentation tells us that:
1. this function is suited for multi class classification problems.
2. We don't have to use the `torch.sigmoid` activation at the output because `CrossEntropyLoss()` already performs the sigmoid. 
3. We don't need to one-hot encode the classes (target vector `y`) but the target vector should be consist of the class indices (in our case 0, 1, 2)

In [9]:

# instantiate the network
net = fcnn()
net = net.float()
# print(net)

# define the optimizer 
optimizer = optim.SGD(net.parameters(), lr=0.2)    
# define the loss
loss_fun = nn.CrossEntropyLoss()

Now we program a loop to train the network with batch training, that is we take a batch of 20 samples at a time and predict their classes and calculate the network parameter updates with *loss.backward()*. At this point we do not update the parameters of the network, but just calculate the gradients that are necessary for the update. We keep accumulating (adding up) the updates until the end of the outer loop, that is, until the end of the epoch and only then we perform the weights/biases update

In [10]:
epochs = 1000
batch_size = 20
epoch_loss = []
for epoch in range(epochs):
    optimizer.zero_grad()        
    for i in range(0, X_train.shape[0], batch_size):

        x_b = X_train[i: i + batch_size].float()
        y_b = y_train[i: i + batch_size]

        y_hat = net(x_b)
        loss = loss_fun(y_hat, y_b) 
        loss.backward()
        optimizer.step()

    epoch_loss.append(loss)
    if epoch % 100 == 0:
        print("Epoch {}, average loss = {}".format(epoch, loss/epochs))

# test accuracy
predicted = net(X_test)
_, y_pred = torch.max(predicted, 1)  # output 1 = max, output 2 = argmax

print('test set accuracy', accuracy_score(y_test.data, y_pred.data))
print("end")

Epoch 0, average loss = 0.0010770841035991907
Epoch 100, average loss = 0.00020548349129967391
Epoch 200, average loss = 0.0001368635130347684
Epoch 300, average loss = 0.0001001921045826748
Epoch 400, average loss = 9.206093091052026e-05
Epoch 500, average loss = 9.066337952390313e-05
Epoch 600, average loss = 9.018040145747364e-05
Epoch 700, average loss = 9.013263479573652e-05
Epoch 800, average loss = 9.02847750694491e-05
Epoch 900, average loss = 9.048387437360361e-05
test set accuracy 1.0
end
