####  CPU Version of PyTorch
If you do not need GPU support, you can install the CPU-only version of PyTorch, which may help avoid issues with missing CUDA-related libraries:

##### pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


In [2]:
# Import required libraries
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [3]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data  # Features (4 features)
y = iris.target  # Labels (3 classes)

In [6]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [7]:
y[:5]

array([0, 0, 0, 0, 0])

In [8]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:

# Standardize the data (zero mean, unit variance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [11]:
X_train[:5]

tensor([[-1.4739,  1.2037, -1.5625, -1.3126],
        [-0.1331,  2.9924, -1.2760, -1.0456],
        [ 1.0859,  0.0857,  0.3859,  0.2892],
        [-1.2301,  0.7565, -1.2187, -1.3126],
        [-1.7177,  0.3093, -1.3906, -1.3126]])

In [12]:
# Define the ANN model with 3 hidden layers
class SimpleANN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        super(SimpleANN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)  # First hidden layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)  # Second hidden layer
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)  # Third hidden layer
        self.fc4 = nn.Linear(hidden_size3, output_size)  # Output layer
        self.relu = nn.ReLU()  # ReLU activation function

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        return out


In [13]:
# Model parameters
input_size = 4  # Number of features in the dataset
hidden_size1 = 16  # Number of neurons in the first hidden layer
hidden_size2 = 12  # Number of neurons in the second hidden layer
hidden_size3 = 8  # Number of neurons in the third hidden layer
output_size = 3  # Number of output classes (setosa, versicolor, virginica)


In [16]:
# Instantiate the model
model = SimpleANN(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
model

SimpleANN(
  (fc1): Linear(in_features=4, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=12, bias=True)
  (fc3): Linear(in_features=12, out_features=8, bias=True)
  (fc4): Linear(in_features=8, out_features=3, bias=True)
  (relu): ReLU()
)

In [17]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer

##### Trainn the model

In [20]:
# Train the model for a specified number of epochs
num_epochs = 100  # Number of epochs for training
for epoch in range(num_epochs):
    # Forward pass: pass the input data through the model to get the predicted outputs
    outputs = model(X_train)
    # Calculate the loss between the predicted outputs and the actual labels
    loss = criterion(outputs, y_train)
    
    # Backward pass and optimization: update the model parameters to minimize the loss
    # Zero the gradients of the optimizer to prevent accumulation
    optimizer.zero_grad()
    # Backpropagate the loss to compute the gradients of the model parameters
    loss.backward()
    # Update the model parameters using the gradients and the optimizer
    optimizer.step()
    
    # Print the loss at every 10th epoch for monitoring the training progress
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0230
Epoch [20/100], Loss: 0.0198
Epoch [30/100], Loss: 0.0164
Epoch [40/100], Loss: 0.0132
Epoch [50/100], Loss: 0.0104
Epoch [60/100], Loss: 0.0083
Epoch [70/100], Loss: 0.0065
Epoch [80/100], Loss: 0.0051
Epoch [90/100], Loss: 0.0041
Epoch [100/100], Loss: 0.0034


##### Evaluation of model

In [21]:
# Evaluate the model
with torch.no_grad():
    # Predict on training and test sets
    y_pred_train = model(X_train).argmax(dim=1)
    y_pred_test = model(X_test).argmax(dim=1)

    # Compute accuracy
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)

    print(f'Training Accuracy: {train_accuracy:.4f}')
    print(f'Test Accuracy: {test_accuracy:.4f}')

Training Accuracy: 1.0000
Test Accuracy: 1.0000


##### Inference on unseen data

In [23]:
import numpy as np

In [26]:
# Function for inference on new data
def predict_new_data(new_data):
    # Standardize the new data (same scaler used for training)
    new_data = scaler.transform(np.array(new_data).reshape(1, -1))  # Reshape for single sample
    new_data_tensor = torch.tensor(new_data, dtype=torch.float32)  # Convert to tensor
    
    # Get the model output
    with torch.no_grad():  # No need to track gradients for inference
        output = model(new_data_tensor)
        print(output.argmax(dim=1))
        predicted_class = output.argmax(dim=1).item()  # Get the predicted class

    # Return the predicted class
    return iris.target_names[predicted_class]  # Return the class name


# Example of inference on a new data point
new_sample = [5.1, 3.5, 1.4, 0.2]  # A new Iris sample (likely to be Setosa)
predicted_class = predict_new_data(new_sample)
print(f'Predicted class for new sample {new_sample}: {predicted_class}')

tensor([0])
Predicted class for new sample [5.1, 3.5, 1.4, 0.2]: setosa


In [32]:
# Function for inference on new data (already defined)
def predict_new_data(new_data):
    # Standardize the new data (same scaler used for training)
    new_data = scaler.transform(np.array(new_data).reshape(1, -1))  # Reshape for single sample
    new_data_tensor = torch.tensor(new_data, dtype=torch.float32)  # Convert to tensor
    
    # Get the model output
    with torch.no_grad():  # No need to track gradients for inference
        output = model(new_data_tensor)
        predicted_class = output.argmax(dim=1).item()  # Get the predicted class

    # Return the predicted class
    return iris.target_names[predicted_class]  # Return the class name


# Example of inference on multiple new data points
new_samples = [
    [5.7, 2.8, 4.5, 1.3],  # Likely to be Versicolor
    [6.3, 3.3, 6.0, 2.5],  # Likely to be Virginica
    [4.9, 3.0, 1.4, 0.2],  # Likely to be Setosa
    [5.5, 2.4, 3.7, 1.0],  # Likely to be Versicolor
    [7.2, 3.6, 6.1, 2.5],  # Likely to be Virginica
]

# Loop through each new sample and print the predicted class
for idx, sample in enumerate(new_samples):
    predicted_class = predict_new_data(sample)
    print(f'Predicted class for new sample {sample}: {predicted_class}')


Predicted class for new sample [5.7, 2.8, 4.5, 1.3]: versicolor
Predicted class for new sample [6.3, 3.3, 6.0, 2.5]: virginica
Predicted class for new sample [4.9, 3.0, 1.4, 0.2]: setosa
Predicted class for new sample [5.5, 2.4, 3.7, 1.0]: versicolor
Predicted class for new sample [7.2, 3.6, 6.1, 2.5]: virginica
