# Abalone Age Prediction by Multiclass Classification using ANN

## Load dataset for the training process

In [1]:
import os
import numpy as np
import pandas as pd

from typing import List

### Open the CSV

In [2]:
df_train = pd.read_csv("abalone_train.csv")
df_val = pd.read_csv("abalone_val.csv")
print("Num train:", len(df_train))
print("Num val:", len(df_val))
df_train.head()

Num train: 2924
Num val: 418


Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,1,0.305,0.225,0.07,0.1485,0.0585,0.0335,0.045,7
1,2,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9
2,0,0.46,0.37,0.12,0.5335,0.2645,0.108,0.1345,6
3,2,0.575,0.45,0.155,0.948,0.429,0.206,0.259,7
4,2,0.505,0.44,0.14,0.8275,0.3415,0.1855,0.239,8


### Create dataset loader for training and validation process

In [3]:
import torch
from torch.utils.data import TensorDataset, DataLoader, Dataset

### Features to be used and target

In [4]:
x_names = ["Sex", "Length", "Diameter", "Height", "Whole weight", "Shucked weight", "Viscera weight", "Shell weight"]
y_name = "Rings"
# y_classes = list(range(1, 30)) # 1 to 29 (from EDA)

### Convert Pandas dataframe to PyTorch dataset

In [5]:
def df_to_dataset(df: pd.DataFrame) -> Dataset:
    features = df[x_names].to_numpy(dtype=np.float32)
    # labels = df[[y_name]].to_numpy(dtype=np.int64).squeeze(axis=1)
    # labels = labels - 1 # map class #0 to Rings 1
    labels = df[[y_name]].to_numpy(dtype=np.float32).squeeze(axis=1)
    features = torch.from_numpy(features)
    labels = torch.from_numpy(labels)
    my_dataset = TensorDataset(features, labels)
    return my_dataset

In [6]:
ds_train = df_to_dataset(df_train)
ds_val = df_to_dataset(df_val)

### Create PyTorch data loader

In [7]:
BATCH_SIZE = 64
loader_train = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) # drop last for stability
loader_val = DataLoader(ds_val, batch_size=BATCH_SIZE)

## Model training

In [8]:
import torch.nn as nn
import torch.nn.functional as F

from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
from tqdm.notebook import tqdm
# from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [9]:
MAX_EPOCHS = 1000
INIT_LR = 1e-5

### Which device we will use for training process (CPU/GPU)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


### Create the model

In [11]:
class MLP4Layers(nn.Module):
    def __init__(self, n_features: int, n_classes: int):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 128)
        self.fc2 = nn.Linear(128, 512)
        self.fc3 = nn.Linear(512, 128)
        self.fc4 = nn.Linear(128, n_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# model = MLP4Layers(n_features=len(x_names), n_classes=len(y_classes))
model = MLP4Layers(n_features=len(x_names), n_classes=1)
# Move the model from CPU to the device
# Actually, only required if the device is not CPU and has no effect if it is CPU
model = model.to(device)

### Define the loss function and the optimizer

In [12]:
# loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=INIT_LR)

### Prepare the logger

In [13]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
save_dir = os.path.join('runs_reg', 'train_{}'.format(timestamp))
os.makedirs(save_dir, exist_ok=True)
writer = SummaryWriter(save_dir)

### The training and validation process

During the training process, launch tensorboard to see the logged train/val metrics
```bash
tensorboard --logdir runs_clf
```
Then, open the link using web browser

In [14]:
# Variables to hold some training status
epoch_number = 0
lowest_loss = np.inf
# best_f1 = 0.
best_mae = np.inf
# Training loop
for epoch in tqdm(range(MAX_EPOCHS)):
    # Make sure gradient tracking is on, and do a pass over the data for the training process
    model.train()
    running_loss = 0.
    for i, data in enumerate(loader_train):
        # Every data instance is an input & label pair
        inputs, labels = data
        # We move the data instance from CPU to the device
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Zero your gradients for every batch!
        optimizer.zero_grad()
        # Make predictions for this batch
        # outputs = model(inputs)
        outputs = model(inputs).flatten() # flatten: (batch,1) -> (batch,)
        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()
        # Adjust learning weights
        optimizer.step()
        # Gather data and report
        running_loss += loss.detach().item()
    # Calculate the average training loss
    avg_loss = running_loss / (i + 1)

    # We don't need gradients for the model validation process
    model.eval()
    running_vloss = 0.0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for i, vdata in enumerate(loader_val):
            vinputs, vlabels = vdata
            y_true.extend(vlabels.numpy().tolist())
            # voutputs = model(vinputs.to(device))
            voutputs = model(vinputs.to(device)).flatten() # flatten: (batch,1) -> (batch,)
            vloss = loss_fn(voutputs, vlabels.to(device))
            running_vloss += vloss.item()
            # the class with the highest energy is what we choose as prediction
            # _, predicted = torch.max(voutputs.data, 1)
            # y_pred.extend(predicted.cpu().numpy().tolist())
            # we save the regression result as is
            y_pred.extend(voutputs.cpu().numpy().tolist())

    # Calculate the average validation loss
    avg_vloss = running_vloss / (i + 1)
    # Calculate our classification metrics
    # acc = accuracy_score(y_true, y_pred)
    # f1 = f1_score(y_true, y_pred, average="weighted")
    # Calculate our regression metrics
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalar('train/loss', avg_loss, epoch_number + 1)
    writer.add_scalar('val/loss', avg_vloss, epoch_number + 1)
    # writer.add_scalar('val/acc', acc, epoch_number + 1)
    # writer.add_scalar('val/weighted_f1', f1, epoch_number + 1)
    writer.add_scalar('val/mae', mae, epoch_number + 1)
    writer.add_scalar('val/mse', mse, epoch_number + 1)
    writer.add_scalar('val/rmse', rmse, epoch_number + 1)
    writer.add_scalar('val/r2_score', r2, epoch_number + 1)
    writer.flush()


    # Track best performance, and save the model's state (weights)
    # if f1 > best_f1:
    #     best_f1 = f1
    if mae < best_mae:
        best_mae = mae
        model_path = os.path.join(save_dir, 'best.pt')
        torch.save(model.state_dict(), model_path)
    if avg_vloss < lowest_loss:
        lowest_loss = avg_vloss
        model_path = os.path.join(save_dir, 'lowest_loss.pt')
        torch.save(model.state_dict(), model_path)
    model_path = os.path.join(save_dir, 'last.pt')
    torch.save(model.state_dict(), model_path)

    epoch_number += 1

  0%|          | 0/1000 [00:00<?, ?it/s]

## References
[1] https://pytorch.org/tutorials/beginner/introyt/trainingyt.html