In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split, ConcatDataset
from torchvision import datasets
from torchvision import transforms

import optuna
from optuna.trial import TrialState

from feature_engine.encoding import OneHotEncoder
from ucimlrepo import fetch_ucirepo

import pandas as pd
import numpy as np

from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import root_mean_squared_error


In [3]:
abalone = fetch_ucirepo(id=1)

X = abalone.data.features
y = abalone.data.targets

ohe = OneHotEncoder(variables = ['Sex'])
X = ohe.fit_transform(X)

X.drop(['Whole_weight', 'Length'], axis = 1, inplace = True)

df = pd.concat([X, y], axis = 1)

steps = [
    ('scale', StandardScaler()),
    ('LOF', LocalOutlierFactor(contamination = 0.05))
]

outliers = Pipeline(steps).fit_predict(X)

df['outliers'] = outliers

df2 = df.query('Height < 0.3 and Rings > 2 and outliers != -1').copy()
X = df2.drop(['Rings', 'outliers'], axis = 1)
y = np.log(df2[['Rings']])


X = X.to_numpy()
y = y.to_numpy()

dataset = TensorDataset(torch.tensor(X).float(), torch.tensor(y).float())


In [3]:
class AbaloneModel(nn.Module):
    def __init__(self, in_features, layer_config):
        super().__init__()

        layers = []
        current_dim = in_features

        for hidden_dim in layer_config:
            layers.append(nn.Linear(current_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            current_dim = hidden_dim
        
        layers.append(nn.Linear(current_dim, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x): return self.net(x)

In [4]:
class EarlyStopping:
    def __init__(self, patience = 5, delta = 0, verbose = False):
        self.patience = patience
        self.delta = delta
        self.verbose = verbose
        self.best_loss = None
        self.no_improvement_count = 0
        self.stop_training = False
    
    def check_early_stop(self, val_loss):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.no_improvement_count = 0
        else: 
            self.no_improvement_count += 1
            if self.no_improvement_count >= self.patience:
                self.stop_training = True
                if self.verbose:
                    print("Stopping early as no improvement has been observed.")

In [5]:
def getsplits():

    train_size = int(0.7 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size

    generator = torch.Generator().manual_seed(42)

    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], generator = generator)

    return train_dataset, val_dataset, test_dataset

In [6]:
def objective(trial):

    train_dataset, val_dataset, _ = getsplits()

    n_layers = trial.suggest_int('n_layers', 1, 4)

    layer_config = []
    for i in range(n_layers):
        nodes = trial.suggest_int(f'n_units_l{i}', 16, 240)
        layer_config.append(nodes)

    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD', 'RMSprop'])
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log = True)
    batch_size = trial.suggest_int('batch_size', 10, 120)

    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, drop_last = True)
    val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)

    model = AbaloneModel(X.shape[1], layer_config)
    criterion = nn.MSELoss()

    if optimizer_name == 'Adam':
        optimizer = getattr(optim, optimizer_name) (model.parameters(), lr = lr)
    else:
        momentum = trial.suggest_float('momentum', 0.5, 0.99)
        optimizer = getattr(optim, optimizer_name) (model.parameters(), lr = lr, momentum = momentum)

    epochs = 40

    for epoch in range(epochs):
        model.train()
        for batch_x, batch_y in train_loader:

            optimizer.zero_grad()
            output = model(batch_x)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:

                output = model(batch_x)
                val_loss += criterion(output, batch_y).item() * batch_x.size(0)

        
        accuracy = val_loss / len(val_loader.dataset)

        trial.report(accuracy, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
    return accuracy


In [7]:
# study = optuna.create_study(
#     storage = "sqlite:///db.sqlite3",
#     study_name = "abalone_experiment",
#     direction = "minimize"
# )
study = optuna.create_study(direction = 'minimize')
study.optimize(objective, n_trials = 100)


pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print('Study statistics: ')
print('  Number of finished trials: ', len(study.trials))
print('  Number of pruned trials: ', len(pruned_trials))
print('  Number of completed trials: ', len(complete_trials))

print('Best trials:')
trial = study.best_trial

print('  Value: ', trial.value)
print('  Params: ')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

[32m[I 2026-02-06 10:31:04,182][0m A new study created in memory with name: no-name-67d9fd24-31db-4c36-8b45-b74d45c42863[0m


[32m[I 2026-02-06 10:31:23,374][0m Trial 0 finished with value: 0.04061840816090504 and parameters: {'n_layers': 1, 'n_units_l0': 239, 'optimizer': 'Adam', 'lr': 0.0011161424505776083, 'batch_size': 110}. Best is trial 0 with value: 0.04061840816090504.[0m
[32m[I 2026-02-06 10:31:41,719][0m Trial 1 finished with value: 0.0427121077805306 and parameters: {'n_layers': 1, 'n_units_l0': 224, 'optimizer': 'RMSprop', 'lr': 0.005178324350045523, 'batch_size': 30, 'momentum': 0.7336592829460561}. Best is trial 0 with value: 0.04061840816090504.[0m
[32m[I 2026-02-06 10:31:46,964][0m Trial 2 finished with value: 0.040198706203337875 and parameters: {'n_layers': 2, 'n_units_l0': 51, 'n_units_l1': 85, 'optimizer': 'RMSprop', 'lr': 0.0013219553348158355, 'batch_size': 114, 'momentum': 0.8226522730982719}. Best is trial 2 with value: 0.040198706203337875.[0m
[32m[I 2026-02-06 10:32:12,144][0m Trial 3 finished with value: 0.051310095343400135 and parameters: {'n_layers': 4, 'n_units_l0': 7

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  87
  Number of completed trials:  13
Best trials:
  Value:  0.03909202642983409
  Params: 
    n_layers: 2
    n_units_l0: 232
    n_units_l1: 165
    optimizer: RMSprop
    lr: 0.0037118451569468454
    batch_size: 25
    momentum: 0.7787041198195408


In [8]:
optuna.visualization.plot_param_importances(study).show()
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_slice(study, params = ['n_layers']).show()

In [9]:
# storage_url = 'sqlite:///db.sqlite3'
# study_name = "abalone_experiment"
# loaded_study = optuna.load_study(study_name = study_name, storage = storage_url)

df3 = study.trials_dataframe()
best_score = df3['value'].min()

threshold = best_score * 1.05
candidates = df3[df3['value'] <= threshold]
candidates = candidates.sort_values(by = 'params_n_layers', ascending = True)
best_candidate = candidates.iloc[0]

print(best_candidate)


number                                        0
value                                  0.040618
datetime_start       2026-02-06 10:31:04.193511
datetime_complete    2026-02-06 10:31:23.374314
duration                 0 days 00:00:19.180803
params_batch_size                           110
params_lr                              0.001116
params_momentum                             NaN
params_n_layers                               1
params_n_units_l0                           239
params_n_units_l1                           NaN
params_n_units_l2                           NaN
params_n_units_l3                           NaN
params_optimizer                           Adam
state                                  COMPLETE
Name: 0, dtype: object


In [12]:
patience = 40
delta = 0.01

model = AbaloneModel(X.shape[1], [239])
criterion = nn.MSELoss()
early_stopping = EarlyStopping(patience = patience, delta = delta, verbose = True)

batch_size = int(best_candidate['params_batch_size'])
lr = float(best_candidate['params_lr'])
optimizer_name = str(best_candidate['params_optimizer'])

train_dataset, val_dataset, test_dataset = getsplits()
#full_train = ConcatDataset([train_dataset, val_dataset])

if optimizer_name == 'Adam':
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr = lr)
else:
    momentum = float(best_candidate['params_momentum'])
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr = lr, momentum = momentum)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)

epochs = 200
best_rmse = 1


for epoch in range(epochs):
    val_loss = 0

    model.train()
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1} / {epochs} | Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            output = model(batch_x)
            val_loss += criterion(output, batch_y).item() * batch_x.size(0)

    val_loss /= len(val_loader.dataset)

    early_stopping.check_early_stop(val_loss)

    if early_stopping.stop_training:
        print(f'Early stopping at epoch {epoch}')
        break

#torch.save(model.state_dict(), '1layer_abalone_model.pth')

Epoch 10 / 200 | Loss: 0.1313
Epoch 20 / 200 | Loss: 0.0756
Epoch 30 / 200 | Loss: 0.0904
Epoch 40 / 200 | Loss: 0.0858
Stopping early as no improvement has been observed.
Early stopping at epoch 42


In [None]:
model.eval()
test_error = 0

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output = model(batch_x)
        test_error += criterion(output, batch_y).item() * batch_x.size(0)

rmse = np.sqrt(test_error / len(test_loader.dataset))

print(f"Final RMSE: {rmse:.4f}!")

print(np.std(y))

Final RMSE: 0.1980!
0.3118088115462802


In [26]:
print(y.shape)
print(X.shape)

y_pred = model(torch.tensor(X).float())
y_pred = y_pred.detach().numpy()
print(y_pred.shape)


(3967, 1)
(3967, 8)
(3967, 1)


In [27]:
from sklearn.metrics import r2_score

print(r2_score(y,y_pred))

0.6037330288463594


In [None]:
loaded_model = AbaloneModel(X.shape[0], layer_sizes = [124])
loaded_model.load_sate_dict(torch.load("best_abalone_model.pth"))
loaded_model.eval()
prediction = loaded_model('some_new_data')