# Abalone Age Prediction by Multi-class Classification using ANN

## Load dataset for the training process

In [1]:
import os
import numpy as np
import pandas as pd

from typing import List

### Open the CSV

In [2]:
df_train = pd.read_csv("abalone_train.csv")
df_val = pd.read_csv("abalone_val.csv")
print("Num train:", len(df_train))
print("Num val:", len(df_val))
df_train.head()

Num train: 2924
Num val: 418


Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,1,0.305,0.225,0.07,0.1485,0.0585,0.0335,0.045,7
1,2,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9
2,0,0.46,0.37,0.12,0.5335,0.2645,0.108,0.1345,6
3,2,0.575,0.45,0.155,0.948,0.429,0.206,0.259,7
4,2,0.505,0.44,0.14,0.8275,0.3415,0.1855,0.239,8


### Create dataset for training and validation process

In [3]:
import tensorflow as tf
import tensorflow_addons as tfa

2023-05-12 13:28:04.652823: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-12 13:28:04.654562: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-12 13:28:04.688477: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-12 13:28:04.688899: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a min

### Features to be used and target

In [4]:
x_names = ["Sex", "Length", "Diameter", "Height", "Whole weight", "Shucked weight", "Viscera weight", "Shell weight"]
y_name = "Rings"
y_classes = list(range(1, 30)) # 1 to 29 (from EDA)

### Convert Pandas dataframe to numpy arrays

In [5]:
def df_to_dataset(df: pd.DataFrame) -> tuple:
    features = df[x_names].to_numpy(dtype=np.float32)
    labels = df[[y_name]].to_numpy(dtype=np.int64).squeeze(axis=1)
    labels = np.clip(labels, min(y_classes), max(y_classes))  # limit the age range to 1 to 29
    labels = labels - 1  # map class #0 to Rings 1
    labels = tf.keras.utils.to_categorical(labels, num_classes=len(y_classes))  # convert integer labels to one-hot encoding
    return features, labels

In [6]:
feat_train, lab_train = df_to_dataset(df_train)
feat_val, lab_val = df_to_dataset(df_val)

## Model training

In [7]:
from datetime import datetime
from tqdm.notebook import tqdm

In [8]:
MAX_EPOCHS = 2000
INIT_LR = 1e-5

### Which device we will use for training process (CPU/GPU)

If a TensorFlow operation has both CPU and GPU implementations, by default, the GPU device is prioritized when the operation is assigned. So, no need to manually specify here.

*Reference: [Use a GPU](https://www.tensorflow.org/guide/gpu)*

In [9]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


2023-05-12 13:28:07.851101: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-12 13:28:07.852383: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### Create the model

In [10]:
import nets

model = nets.get_model_mlp(n_classes=len(y_classes))

### Define the loss function and the optimizer

In [11]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=INIT_LR)

### Prepare the logger

In [12]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
save_dir = os.path.join('runs_clf', 'train_{}'.format(timestamp))
os.makedirs(save_dir, exist_ok=True)

In [13]:
# metrics to log
metrics_l = [
        tf.metrics.CategoricalAccuracy(),
        tfa.metrics.F1Score(
            num_classes=len(y_classes),
            average="weighted",
            name="weighted_f1")
    ]
# prepare model callbacks
class LastModelCheckpoint(tf.keras.callbacks.Callback):
    def on_test_end(self, logs=None):
        global save_dir
        global model
        model_path = os.path.join(save_dir, 'last')
        model.save_weights(model_path)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        os.path.join(save_dir, 'best'),
        monitor="weighted_f1",
        save_best_only=True,
        mode="max",
        save_weights_only=True
    ),
    tf.keras.callbacks.ModelCheckpoint(
        os.path.join(save_dir, 'lowest_loss'),
        monitor="val_loss",
        save_best_only=True,
        mode="min",
        save_weights_only=True
    ),
    LastModelCheckpoint(),
    tf.keras.callbacks.TensorBoard(save_dir)
]

### The training and validation process

During the training process, launch tensorboard to see the logged train/val metrics
```bash
tensorboard --logdir runs_clf
```
Then, open the link using web browser

In [14]:
# configure model
model.compile(
    optimizer=optimizer,
    loss=loss_fn,
    metrics=metrics_l)

In [15]:
BATCH_SIZE = 64
model.fit(
    feat_train,
    lab_train,
    batch_size=BATCH_SIZE,
    epochs=MAX_EPOCHS,
    verbose=0,
    callbacks=callbacks,
    validation_data=(feat_val, lab_val),
    validation_freq=1)

<keras.callbacks.History at 0x7f80b03e5e50>

## References
[1] https://www.tensorflow.org/tutorials/quickstart/advanced

[2] https://www.tensorflow.org/tensorboard/scalars_and_keras

[3] https://www.tensorflow.org/tutorials/keras/save_and_load
