# Classification of Iris Data Set

## Import Libraries

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Loading the Iris Data Set

In [4]:
iris = load_iris()
#print(iris)

## Reproducibility the results

In [None]:
import random
def seed_all(seed=1029):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  #if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
seed_all(1029)

## Now we will process the Iris Data Set

In [5]:
features = iris.data
#print(features)
target = iris.target
#print(len(target))

## Split the Data Set

In [None]:
#splitting to 20% test 80% train
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2)

## Now we will standardize the values 

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Initiaing the number of classes and features for the training function

In [None]:
features_count = x_train.shape[1]
print(features_count)
classes = len(np.unique(target))
print(classes)

## Converting them into Pytorch Tensors

In [None]:
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

## Building up the model 

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dnn_model = Sequential(Linear(features_count,10), nn.ReLU(),
                       Linear(10,20), nn.ReLU(),
                       Linear(20,15), nn.ReLU(),
                       Linear(15, classes)).to(device)

# Computing accuracy

In [None]:
def compute_acc(model, X_data, y_data, device):
    model.eval() #Evaluation Model
    with torch.no_grad(): #Disable gradient calculation to save computation energy 
        X_data, y_data = X_data.to(device), y_data.to(device)
        #use the model to predict the results
        outputs = model(X_data.type(torch.float32))  
        #choose the highest probability for each row so it belongs to the certain class
        predicted = torch.argmax(outputs.data, 1)
        corrects = (predicted == y_data.type(torch.long)).sum().item()
        totals = y_data.size(0)
        acc = float(corrects) / totals
    return acc

## Function to create model 

In [None]:
def create_model():
	dnn_model = Sequential(Linear(features_count,10), nn.ReLU(),
                         Linear(10,20), nn.ReLU(),
                         Linear(20,15), nn.ReLU(),
                         Linear(15, classes))
	return dnn_model

## Training model 

In [None]:
def fit(model=None, X_train=None, y_train=None, loss_fn=None, optimizer=torch.optim.Adam,
        learning_rate=0.001, num_epochs=100, verbose=True, seed=1234, device=None):
    torch.manual_seed(seed)
    optim = optimizer(model.parameters(), lr=learning_rate)
    history = dict()
    history['train_loss'] = []
    history['train_acc'] = []

    # Move data to device
    X_train, y_train = X_train.to(device), y_train.to(device)

    for epoch in range(num_epochs):
        #allow the model to go in training mode
        model.train()
        
        #foward propaogation
        outputs = model(X_train.type(torch.float32))
        loss = loss_fn(outputs, y_train.type(torch.long))
        
        #make sure the gradient computed is zero so it doesnt accumulate from previous iteration
        optim.zero_grad()
        #conpute the gradient lost
        loss.backward()
        #update the weights
        optim.step()

        #evaluate the accuracy for this current epoch
        model.eval()  # Set the model to evaluation mode
        train_loss = compute_loss(model, loss_fn, X_train, y_train, device)
        train_acc = compute_acc(model, X_train, y_train, device)

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)

        if verbose:
            print(f"Epoch {epoch+1}/{num_epochs}")
            print(f"train loss= {train_loss:.4f} - train acc= {train_acc*100:.2f}%")

    return history

## Training model and choosing the optimiser 

In [None]:
from torch import optim
optim_dict = {"Adam":optim.Adam, "Adadelta":optim.Adadelta, "Adagrad":optim.Adagrad,
              "Adamax":optim.Adamax, "AdamW": optim.AdamW, "ASGD":optim.ASGD,
              "NAdam":optim.NAdam, "RMSprop":optim.RMSprop, "RAdam":optim.RAdam,
              "Rprop": optim.Rprop, "SGD":optim.SGD}

dnn_model = create_model().to(device)
history = fit(dnn_model, X_train = x_train_tensor, y_train=y_train_tensor, loss_fn = nn.CrossEntropyLoss(),
    optimizer = optim_dict["SGD"], learning_rate = 0.1, num_epochs = 50, verbose= True, seed=123, device=device)

## Testing the model  

In [None]:
def compute_wrong(model, X_data, y_data, device):
    model.eval() #Evaluation Model
    with torch.no_grad(): #Disable gradient calculation to save computation energy 
        X_data, y_data = X_data.to(device), y_data.to(device)
        #use the model to predict the results
        outputs = model(X_data.type(torch.float32))  
        #choose the highest probability for each row so it belongs to the certain class
        predicted = torch.argmax(outputs.data, 1)
        #print("Actual:   ", y_data)
        #print("Predicted:", predicted)
        corrects = (predicted == y_data.type(torch.long)).sum().item()
        incorrects = (predicted != y_data.type(torch.long)).sum().item()
        totals = y_data.size(0)
        acc = float(corrects) / totals
        #0 = Iris Setosa, 1 = Iris Versicolor , 2 = Iris Virginica
        for i in range(len(y_data)):
            if predicted[i] != y_data[i]:
                #if we predict it is setosa
                if predicted[i] == 0:
                    if y_data[i] == 1:
                        print("Predicted : Iris Setosa", "|| Correct : Iris Versicolor")
                    
                    else:
                        print("Predicted : Iris Setosa", "|| Correct : Iris Virginica")
                        
                #if we predict it is versicolor
                elif predicted[i] == 1:
                    if y_data[i] == 0:
                        print("Predicted : Iris Versicolor ", "|| Correct : Iris Setosa")
                    
                    else:
                        print("Predicted : Iris Versicolor", "|| Correct : Iris Virginica")
                #if we predict it is virginica
                else:
                    if y_data[i] == 0:
                        print("Predicted : Iris Virginica", "|| Correct : Iris Setosa")
                    
                    else:
                        print("Predicted : Iris Virginica", "|| Correct : Iris Versicolor")
        
                    
        
    return acc

In [None]:
test_accuracy = compute_wrong(dnn_model, x_test_tensor, y_test_tensor, device)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

We can pretty much see that the model is having a hard time differentiating between Iris Virginica and Iris Versicolor, this might be caused by the similaries of petal length, petal width and speal length and sepal width.

# Testing Stage

Now we want to investigate whether if the number of epoch would affect the accuracy and how would the time be affected too.

In [None]:
import time
def number_of_epoches():
    accuracy_hist = []
    time_taken = []
    num_epoch = [20,30,40,50,60,70]
    dnn_model = Sequential(Linear(features_count,10), nn.ReLU(),
                         Linear(10,20), nn.ReLU(),
                         Linear(20,15), nn.ReLU(),
                         Linear(15, classes))
    
    for epochs in num_epoch:
        starttime  = time.time()
        history = fit(dnn_model, X_train=x_train_tensor, y_train=y_train_tensor, loss_fn=nn.CrossEntropyLoss(),
                      optimizer=optim_dict["SGD"], learning_rate=0.1, num_epochs=epochs, verbose=False, seed=101, device=device)
        endtime = time.time()
        elapsed_time = endtime - starttime
        time_taken.append(elapsed_time)
        
        #saving the accuracies
        accuracy_hist.append(compute_acc(dnn_model, x_test_tensor, y_test_tensor, device))
    
    
    #plotting for Accuracy vs Epoch
    plt.plot(num_epoch, accuracy_hist, 'bo-', label='Accuracy vs Number of Epochs')
    plt.xlabel('Number of Epochs')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs Number of Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    #plotting for Time vs Neurons
    plt.plot(num_epoch, time_taken, 'ro-', label='Time vs Number of Epochs')
    plt.xlabel('Number of Epochs')
    plt.ylabel('Time')
    plt.title('Time vs Number of Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    
    return None
    
number_of_epoches() 

Now we want to see if we can reduce the computation energy by reducing the hidden layer to one.

## With one hidden layer only

In [None]:
def create_model_one_layer():
	dnn_model = Sequential(Linear(features_count,10), nn.ReLU(),
                         Linear(10, classes))
	return dnn_model

In [None]:
dnn_model = create_model_one_layer().to(device)
history = fit(dnn_model, X_train = x_train_tensor, y_train=y_train_tensor, loss_fn = nn.CrossEntropyLoss(),
optimizer = optim_dict["SGD"], learning_rate = 0.1, num_epochs = 60, verbose= False, seed=101, device=device)


In [None]:
test_accuracy = compute_wrong(dnn_model, x_test_tensor, y_test_tensor, device)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

We can see that even with one layer, we could still perform well, we could conserve computation energy by just using one layer.

Now we want to see how the number of neurons for a layer affect the accuracies and the time for computation.

In [None]:
import time
def number_of_neurons_a_layer():
    accuracy_hist = []
    time_taken = []
    num_neurons = [5,10,15,20,25,30]
    models = [
            Sequential(Linear(features_count, 5), nn.ReLU(), Linear(5, classes)),
            Sequential(Linear(features_count, 10), nn.ReLU(), Linear(10, classes)),
            Sequential(Linear(features_count, 15), nn.ReLU(), Linear(15, classes)),
            Sequential(Linear(features_count, 20), nn.ReLU(), Linear(20, classes)),
            Sequential(Linear(features_count, 25), nn.ReLU(), Linear(25, classes)),
            Sequential(Linear(features_count, 30), nn.ReLU(), Linear(30, classes)),
    ]
    
    for model in models:
        starttime  = time.time()
        history = fit(model, X_train=x_train_tensor, y_train=y_train_tensor, loss_fn=nn.CrossEntropyLoss(),
                      optimizer=optim_dict["SGD"], learning_rate=0.1, num_epochs=60, verbose=False, seed=101, device=device)
        endtime = time.time()
        elapsed_time = endtime - starttime
        time_taken.append(elapsed_time)
        
        #saving the accuracies
        accuracy_hist.append(compute_acc(model, x_test_tensor, y_test_tensor, device))
    
    
    #plotting for Accuracy vs Neurons
    plt.plot(num_neurons, accuracy_hist, 'bo-', label='Accuracy vs Number of Neurons')
    plt.xlabel('Number of Neurons')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs Number of Neurons')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    #plotting for Time vs Neurons
    plt.plot(num_neurons, time_taken, 'ro-', label='Accuracy vs Number of Neurons')
    plt.xlabel('Number of Neurons')
    plt.ylabel('Time') 
    plt.title('Time vs Number of Neurons')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    
    return None
    
number_of_neurons_a_layer() 

As we can see, most of the time, with higher number of neurons, we are able to get higher accuracies, however, the time needed for computation also increases which is a trade off. This is due to more number of neurons would be able to **learn more complex patterns** , **avoids underfitting** and also **introduce more non-linear combinations learning**.