# Train a multi-class classification model using PyTorch

We use the iris dataset to train and test this multi-class classification model.

In [1]:
#Put all the libraries here
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from sklearn import datasets
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

## Load iris dataset from scikit-learn 

In [2]:
iris = datasets.load_iris()

#Classe names
print("***********Classe names***********")
print(iris.target_names)
#Feature names
print("***********Feature names***********")
print(iris.feature_names)
#Data sample festure values
print("***********Data sample festure values***********")
print(iris.data)
print("***********Data size and feature size***********")
print(iris.data.shape)
#Data label values
print("***********Data label values***********")
print(iris.target)

***********Classe names***********
['setosa' 'versicolor' 'virginica']
***********Feature names***********
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
***********Data sample festure values***********
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 

## Convert the data to PyTorch tensors 

In [7]:
X, y = iris.data, iris.target

#Split the data into two sets: 80% for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create a TensorDataset for training and testing, respectively
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders: here, we use mini-batch gradient descent, so need to specify the batch size
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

## Construct a multi-class classification model

In [4]:
class MultiClassification(nn.Module):
    def __init__(self):
        super(MultiClassification, self).__init__()
        #The first "4" specifies that the feature dimension is 4, and the second "3" specifies that this is 3-class classification
        self.fc = nn.Linear(4, 3) 
    
    def forward(self, x):
        y = self.fc(x)
        
        return y

## Set up some hyperparameters: use cross entropy loss, gradient descent with Adam optimizer, learning rate, and epochs

In [5]:
epochs = 50
learning_rate = 0.01
lossfunction = nn.CrossEntropyLoss() #Cross entropy loss for multi-class classification

#Instantiate the model from "MultiClassification" class definition
model = MultiClassification()

#Use Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print(model)

MultiClassification(
  (fc): Linear(in_features=4, out_features=3, bias=True)
)


## Train the model using training data (X_train, y_train)

In [8]:
#Define the training function
def train(epoch, model, train_dataloader, optimizer, lossfunction):
    model.train()
    
    train_loss = 0.0
    train_total, train_correct = 0.0, 0.0 
    
    for X_batch, y_batch in train_dataloader:
        optimizer.zero_grad()

        #Get the predicted output
        predictions = model(X_batch)

        #Calculate the loss
        loss = lossfunction(predictions, y_batch)
        
        #Update the weights usning gradient descent with Adam optimizer
        loss.backward()
        optimizer.step()
        
        #Convert probabilities to multi-class predictions (reutrn the class with the maximal proability)
        _, train_predicted = torch.max(predictions.data, 1)
        
        #Calculate the training statistics
        train_loss += loss.item()
        train_total += y_batch.size(0)
        train_correct += (train_predicted == y_batch).sum().item()

    print("epoch (%d): Train accuracy: %.4f, loss: %.3f" % (epoch, train_correct/train_total, train_loss/train_total))
    
    
#Train the model
for epoch in range(1, epochs + 1):
    train(epoch, model, train_dataloader, optimizer, lossfunction)

epoch (1): Train accuracy: 0.3417, loss: 0.212
epoch (2): Train accuracy: 0.3417, loss: 0.124
epoch (3): Train accuracy: 0.3583, loss: 0.073
epoch (4): Train accuracy: 0.5750, loss: 0.068
epoch (5): Train accuracy: 0.6583, loss: 0.063
epoch (6): Train accuracy: 0.6667, loss: 0.056
epoch (7): Train accuracy: 0.7833, loss: 0.052
epoch (8): Train accuracy: 0.7917, loss: 0.049
epoch (9): Train accuracy: 0.7167, loss: 0.047
epoch (10): Train accuracy: 0.6833, loss: 0.044
epoch (11): Train accuracy: 0.7750, loss: 0.042
epoch (12): Train accuracy: 0.8250, loss: 0.042
epoch (13): Train accuracy: 0.7667, loss: 0.040
epoch (14): Train accuracy: 0.7167, loss: 0.038
epoch (15): Train accuracy: 0.7167, loss: 0.037
epoch (16): Train accuracy: 0.8750, loss: 0.037
epoch (17): Train accuracy: 0.9000, loss: 0.036
epoch (18): Train accuracy: 0.7333, loss: 0.035
epoch (19): Train accuracy: 0.8667, loss: 0.034
epoch (20): Train accuracy: 0.9167, loss: 0.034
epoch (21): Train accuracy: 0.9417, loss: 0.033
e

## Test the model using test data (X_test, y_test)

In [10]:
#Define the test function
def test():
    model.eval()
    
    test_correct, test_total = 0.0, 0.0
    y_pred = []
    
    with torch.no_grad():
        for X_batch, y_batch in test_dataloader:
            predictions = model(X_batch)
            
            #Convert probabilities to multi-class predictions (reutrn the class with the maximal proability)
            _, test_predicted = torch.max(predictions.data, 1)
        
            test_total += y_batch.size(0)
            test_correct += (test_predicted == y_batch).sum().item()
            
            y_pred += test_predicted.tolist()

    f1 = f1_score(y_test, y_pred, average="macro")
    print('Test accuracy: %.4f, macro f1_score: %.4f' % (test_correct / test_total, f1))
    
    return y_pred

#Test the model
y_pred = test()

Test accuracy: 0.9667, macro f1_score: 0.9659


## Output the confusion matrix

In [11]:
confusion = confusion_matrix(y_test, y_pred)

print('Confusion Matrix\n')
print(confusion)

Confusion Matrix

[[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]


## Calculate accuracy and F1-score

In [12]:
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="macro")

print('Accuracy: {:.2f}'.format(acc))
print('Macro F1-score: {:.2f}'.format(f1))

Accuracy: 0.97
Macro F1-score: 0.97
