# Model training(continued)

We have so far gathered a dataset of 100,000 uniformly drawn datapoints, a dataset of 20000 near-boundary datapoints by bisection, and implemented CustomLoss to account for normality condition near boundary. We summarize model training here and try to observe performance under different conditions. Furthermore, we will do hyperparameter tuning to try to improve the model with CustomLoss.

## Preparation

In [2]:
import numpy as np
import math
import pandas as pd

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from torchmetrics import Accuracy
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

In [3]:
## Load the dataset
class LorenzDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features = torch.tensor([self.data.iloc[idx].x0, self.data.iloc[idx].y0, self.data.iloc[idx].z0]).float()
        label = torch.tensor(self.data.iloc[idx].attracted).float()
        return features, label
    
dataset_uniform = LorenzDataset("dataset_large.csv")
dataset_near = LorenzDataset("dataset_near.csv")

## Prepare separated and merged datasets
dataset_uniform_train, dataset_uniform_test = torch.utils.data.random_split(dataset_uniform, [80000, 20000])
dataset_near_train, dataset_near_test = torch.utils.data.random_split(dataset_near, [16000, 4000])
dataset_train_merged = torch.utils.data.ConcatDataset([dataset_uniform_train, dataset_near_train])
dataset_test_merged = torch.utils.data.ConcatDataset([dataset_uniform_test, dataset_near_test])



In [4]:
## Define the neural network. The complexity of the network is a hyperparameter.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3,128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x
    
net = Net()

In [5]:
class CustomLoss(nn.Module):
    def __init__(self, magnitude, threshold):
        super(CustomLoss, self).__init__()
        self.magnitude = magnitude
        self.threshold = threshold

    def forward(self, predictions, labels, inputs, b):
        l = nn.BCELoss()
        l = l(predictions.squeeze(), labels)

        near_condition = (predictions > 0.5 - self.threshold) & (predictions < 0.5 + self.threshold)
        near_points_prediction = predictions[near_condition]
        near_points_input = inputs[near_condition].clone().detach().requires_grad_(True)
        grads = torch.autograd.grad(near_points_prediction.sum(), near_points_input, create_graph=True, allow_unused=True)[0]
        
        if grads is not None:
            norm_grads = grads / grads.norm(dim = 1, keepdim=True)
            norm_b = b / b.norm(dim=1, keepdim=True)
            inner_product = (norm_grads * norm_b).sum(dim=1)
            inner_product = inner_product ** 2
            l += self.magnitude * inner_product
        
        return l

In [6]:
def lorenz(t, X, sigma=10, beta=8/3, r=10):
    """The Lorenz equations."""
    x, y, z = X
    xp = sigma*(y - x)
    yp = r*x - y - x*z
    zp = -beta*z + x*y
    return xp, yp, zp

In [7]:
def train_model_bce(net, dataset_train, batchsize, epochs, lr):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)
    dataloader_train = DataLoader(dataset_train, batch_size=batchsize, shuffle=True)
    for epoch in range(epochs):
        running_loss = 0.0
        mini_batch_count = 0
        for features, labels in dataloader_train:
            labels = ((labels + 1) / 2).float()
            labels = labels.view(-1)
            optimizer.zero_grad()
            outputs = net(features).view(-1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            mini_batch_count += 1
        print('Finished training for epoch %d, loss: %.3f' % (epoch + 1, running_loss / mini_batch_count))
    return net
    
def train_model_custom(net, dataset_train, batchsize, epochs, lr, magnitude, threshold):
    criterion = CustomLoss(magnitude, threshold)
    optimizer = optim.Adam(net.parameters(), lr=lr)
    dataloader_train = DataLoader(dataset_train, batch_size=batchsize, shuffle=True)
    for epoch in range(epochs):
        running_loss = 0.0
        mini_batch_count = 0
        for features, labels in dataloader_train:
            labels = ((labels + 1) / 2).float()
            labels = labels.view(-1)
            optimizer.zero_grad()
            outputs = net(features).view(-1)
            loss = criterion(outputs, labels, features, lorenz(0, features.T))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            mini_batch_count += 1
        print('Finished training for epoch %d, loss: %.3f' % (epoch + 1, running_loss / mini_batch_count))
    return net

def test_model(net, dataset_test):
    dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=False)
    features_list = []
    labels_list = []
    probability_list = []
    predictions_list = []
    acc = Accuracy(task = 'binary')

    with torch.no_grad():
        for features, labels in dataloader_test:
            labels = ((labels + 1) / 2).float()
            probability = net(features)
            outputs = (probability > 0.5).float()
            outputs = outputs.squeeze()

            acc.update(outputs, labels)

            # Move features, labels, and outputs to CPU and convert them to numpy arrays
            features_list.append(features.numpy())
            labels_list.append(labels.numpy())
            probability_list.append(probability.numpy())
            predictions_list.append(outputs.numpy())
    accuracy = acc.compute()
    print(f'Accuracy: {accuracy}')

    # Concatenate all batches
    features = np.concatenate(features_list, axis=0)
    labels = np.concatenate(labels_list, axis=0)
    probability = np.concatenate(probability_list, axis=0)
    predictions = np.concatenate(predictions_list, axis=0)

    # Create a 3D scatter plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Plot the features colored by the predictions
    scatter = ax.scatter(features[:, 0], features[:, 1], features[:, 2], c=probability, cmap='coolwarm')

    # Add a color bar
    plt.colorbar(scatter)

    plt.show()

    plt.hist(probability, bins=100)
    plt.show()

## Training with uniform data only

In [None]:
net_uniform_bce = Net()
net_uniform_custom = Net()

In [None]:
net_uniform_bce = train_model_bce(net_uniform_bce, dataset_uniform_train, 32, 100, 0.001)

In [None]:
test_model(net_uniform_bce, dataset_uniform_test)

In [None]:
test_model(net_uniform_bce, dataset_near_test)

In [None]:
test_model(net_uniform_bce, dataset_test_merged)

In [None]:
net_uniform_custom = train_model_custom(net_uniform_custom, dataset_uniform_train, 32, 100, 0.001, 100, 0.01)

In [None]:
test_model(net_uniform_custom, dataset_uniform_test)

In [None]:
test_model(net_uniform_custom, dataset_near_test)

In [None]:
test_model(net_uniform_custom, dataset_test_merged)

## Training with near data only

In [None]:
net_near_bce = Net()
net_near_custom = Net()

In [None]:
net_near_bce = train_model_bce(net_near_bce, dataset_near_train, 32, 100, 0.001)

In [None]:
test_model(net_near_bce, dataset_uniform_test)

In [None]:
test_model(net_near_bce, dataset_near_test)

In [None]:
test_model(net_near_bce, dataset_test_merged)

In [None]:
net_near_custom = train_model_custom(net_near_custom, dataset_near_train, 32, 100, 0.001, 100, 0.01)

In [None]:
test_model(net_near_custom, dataset_uniform_test)

In [None]:
test_model(net_near_custom, dataset_near_test)

In [None]:
test_model(net_near_custom, dataset_test_merged)

## Training with mixed data

In [None]:
net_mixed_bce = Net()
net_mixed_custom = Net()

In [None]:
net_mixed_bce = train_model_bce(net_mixed_bce, dataset_train_merged, 32, 100, 0.001)

In [None]:
test_model(net_mixed_bce, dataset_uniform_test)

In [None]:
test_model(net_mixed_bce, dataset_near_test)

In [None]:
test_model(net_mixed_bce, dataset_test_merged)

In [None]:
net_mixed_custom = train_model_custom(net_mixed_custom, dataset_train_merged, 32, 100, 0.001, 100, 0.01)

In [None]:
test_model(net_mixed_custom, dataset_uniform_test)

In [None]:
test_model(net_mixed_custom, dataset_near_test)

In [None]:
test_model(net_mixed_custom, dataset_test_merged)

In [None]:
## Save the models
torch.save(net_uniform_bce.state_dict(), 'net_uniform_bce.pth')
torch.save(net_uniform_custom.state_dict(), 'net_uniform_custom.pth')
torch.save(net_near_bce.state_dict(), 'net_near_bce.pth')
torch.save(net_near_custom.state_dict(), 'net_near_custom.pth')
torch.save(net_mixed_bce.state_dict(), 'net_mixed_bce.pth')
torch.save(net_mixed_custom.state_dict(), 'net_mixed_custom.pth')


## Hyperparameter tuning

Current hyperparameters are:
1. number of epoch
2. batch size
3. $\epsilon$
4. $\lambda$
5. learning rate
6. architecture of network

We try to tune these hyperparameters via a grid search approach.

In [8]:
from ray import tune

In [9]:
## We focus on model training with mixed dataset and custom loss function
def train_model_custom_tune(config):
    net = Net()
    dataset_train = dataset_train_merged
    batchsize = config["batchsize"]
    epochs = config["epoch"]
    lr = config["lr"]
    magnitude = config["magnitude"]
    threshold = config["threshold"]

    criterion = CustomLoss(magnitude, threshold)
    optimizer = optim.Adam(net.parameters(), lr=lr)
    dataloader_train = DataLoader(dataset_train, batch_size=batchsize, shuffle=True)
    for epoch in range(epochs):
        running_loss = 0.0
        mini_batch_count = 0
        for features, labels in dataloader_train:
            labels = ((labels + 1) / 2).float()
            labels = labels.view(-1)
            optimizer.zero_grad()
            outputs = net(features).view(-1)
            loss = criterion(outputs, labels, features, lorenz(0, features.T))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            mini_batch_count += 1
        tune.report(loss=(running_loss / mini_batch_count))
        print('Finished training for epoch %d, loss: %.3f' % (epoch + 1, running_loss / mini_batch_count))
    return net


In [10]:
search_space = {
    "batchsize": tune.grid_search([32, 64, 128]),
    "epoch": tune.grid_search([100,]),
    "lr": tune.grid_search([0.001, 0.01, 0.1]),
    "magnitude": tune.grid_search([0.1, 1, 10, 100, 1000]),
    "threshold": tune.grid_search([0.1, 0.25])
}

In [11]:
analysis = tune.run(
    train_model_custom_tune,
    config=search_space,
    num_samples=1,
    verbose=1
)

best_config = analysis.get_best_config(metric="loss", mode="min")
print(f"Best config: {best_config}")

0,1
Current time:,2024-06-17 13:31:52
Running for:,00:00:17.06
Memory:,12.5/18.0 GiB

Trial name,# failures,error file
train_model_custom_tune_dd055_00000,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00000_0_batchsize=32,epoch=100,lr=0.0010,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00001,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00001_1_batchsize=64,epoch=100,lr=0.0010,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00002,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00002_2_batchsize=128,epoch=100,lr=0.0010,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00003,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00003_3_batchsize=32,epoch=100,lr=0.0100,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00004,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00004_4_batchsize=64,epoch=100,lr=0.0100,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00005,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00005_5_batchsize=128,epoch=100,lr=0.0100,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00006,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00006_6_batchsize=32,epoch=100,lr=0.1000,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00007,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00007_7_batchsize=64,epoch=100,lr=0.1000,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00008,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00008_8_batchsize=128,epoch=100,lr=0.1000,magnitude=0.1000,threshold=0.1000_2024-06-17_13-31-36/error.txt"
train_model_custom_tune_dd055_00009,1,"/tmp/ray/session_2024-06-17_13-31-33_577217_55471/artifacts/2024-06-17_13-31-35/train_model_custom_tune_2024-06-17_13-31-35/driver_artifacts/train_model_custom_tune_dd055_00009_9_batchsize=32,epoch=100,lr=0.0010,magnitude=1,threshold=0.1000_2024-06-17_13-31-36/error.txt"

Trial name,status,loc,batchsize,epoch,lr,magnitude,threshold
train_model_custom_tune_dd055_00012,RUNNING,127.0.0.1:55519,32,100,0.01,1.0,0.1
train_model_custom_tune_dd055_00013,PENDING,,64,100,0.01,1.0,0.1
train_model_custom_tune_dd055_00014,PENDING,,128,100,0.01,1.0,0.1
train_model_custom_tune_dd055_00015,PENDING,,32,100,0.1,1.0,0.1
train_model_custom_tune_dd055_00016,PENDING,,64,100,0.1,1.0,0.1
train_model_custom_tune_dd055_00017,PENDING,,128,100,0.1,1.0,0.1
train_model_custom_tune_dd055_00018,PENDING,,32,100,0.001,10.0,0.1
train_model_custom_tune_dd055_00019,PENDING,,64,100,0.001,10.0,0.1
train_model_custom_tune_dd055_00020,PENDING,,128,100,0.001,10.0,0.1
train_model_custom_tune_dd055_00021,PENDING,,32,100,0.01,10.0,0.1


2024-06-17 13:31:48,904	ERROR tune_controller.py:1331 -- Trial task failed for trial train_model_custom_tune_dd055_00011
Traceback (most recent call last):
  File "/Users/keqinchen/Documents/GitHub/Estimate-Basin-Boundary/myenv/lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/Users/keqinchen/Documents/GitHub/Estimate-Basin-Boundary/myenv/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/Users/keqinchen/Documents/GitHub/Estimate-Basin-Boundary/myenv/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/Users/keqinchen/Documents/GitHub/Estimate-Basin-Boundary/myenv/lib/python3.10/site-packages/ray/_private/worker.py", line 2613, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
  File "/Users/keqinchen/Do

Best config: None
