# Simple Iris dataset classification with Torch

Example for a multi-class classification problem with a fully connected network and batch training

In [1]:
""" Multi-class classification of the iris dataset with a fully connected net.
    The network takes 4 input features and outputs the probability (softmax)
    of eaech sample in the training batch to belong to each class.
    For example if the batch is made of 10 samples, the output of the network
    is a (10 x 3) matrix where row 1 is a vector with three scalars representing
    the probability that the first sample beongs to each class.
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


class fcnn(nn.Module):
    def __init__(self, input_features=4, hidden_size=5, output_classes=3):
        """ iris dataset has 4 features and 3 flower species (classes) """
        super(fcnn, self).__init__()
        self.layer1 = nn.Linear(input_features, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return self.softmax(x)


if __name__ == "__main__":
    iris = datasets.load_iris()
    df = np.c_[iris.data, iris.target]
    
    df = df[:100, ]  # binary classification

    np.random.shuffle(df)

    X = df[:, :-1]
    y = df[:, -1]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

    X_train = torch.from_numpy(X_train).float()
    X_test = torch.from_numpy(X_test).float()
    y_train = torch.from_numpy(y_train).long()
    y_test = torch.from_numpy(y_test).long()

    # instantiate the network
    net = fcnn()
    net = net.float()
    # print(net)

    # define the optimizer 
    optimizer = optim.SGD(net.parameters(), lr=0.05)    
    # define the loss
    loss_fun = nn.CrossEntropyLoss()

    epochs = 1000
    batch_size = 10
    epoch_loss = []
    for epoch in range(epochs):
        optimizer.zero_grad()
        for i in range(0, X_train.shape[0], batch_size):

            x_b = X_train[i: i + batch_size].float()
            y_b = y_train[i: i + batch_size]
            
            y_hat = net(x_b)
            loss = loss_fun(y_hat.squeeze(), y_b.squeeze()) 
            loss.backward()
            optimizer.step()

        epoch_loss.append(loss)
        if epoch % 100 == 0:
            print("Epoch {}, loss = {}".format(epoch, loss))

    # test accuracy
    predicted = net(X_test)
    _, y_pred = torch.max(predicted, 1)  # output 1 = max, output 2 = argmax

    print('test set accuracy', accuracy_score(y_test.data, y_pred.data))

Epoch 0, loss = 1.1283775568008423
Epoch 100, loss = 0.5516494512557983
Epoch 200, loss = 0.5515233278274536
Epoch 300, loss = 0.5514892935752869
Epoch 400, loss = 0.5514745712280273
Epoch 500, loss = 0.5514665842056274
Epoch 600, loss = 0.5514616966247559
Epoch 700, loss = 0.5514583587646484
Epoch 800, loss = 0.5514560341835022
Epoch 900, loss = 0.5514542460441589
test set accuracy 1.0
