# Task 1.1 - Bathtub function

## Imports

In [1]:
import os
import numpy as np
import tensorflow as tf
import datetime
now = datetime.datetime.now

from keras import Model
from typing import Literal, TypedDict
from dataclasses import dataclass, field
from itertools import product

from bokeh.plotting import show
from bokeh.io import output_notebook
output_notebook()

from data import bathtub
from plots import plot_loss_history, plot_x_y
from models import CustomFFNN, ICNN
from bokeh_saving import save_figures_button
from plot_utils import get_figure_size

PLOTS_DIR = os.path.abspath('plot_pdfs')
if not os.path.exists(PLOTS_DIR):
    os.mkdir(PLOTS_DIR)

FIG_SIZE = get_figure_size(ratio=1.3)
USE_LATEX_STYLE = True

2024-11-01 07:31:20.784861: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-01 07:31:20.792183: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-01 07:31:20.812436: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-01 07:31:20.838011: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-01 07:31:20.845675: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-01 07:31:20.867553: I tensorflow/core/platform/cpu_feature_gu

In [2]:
xs, ys, xs_c, ys_c = bathtub()

## Task 1.1 - Hyperparameter sweep

#### Settings

In [3]:
@dataclass
class Setting:
    input_size: int
    output_size: int
    num_hidden_layers: int
    num_nodes: int
    activation: Literal['linear', 'softplus', 'tanh', 'relu', 'sigmoid']
    epochs: int

    hidden_sizes: list[int] = field(init=False)
    activations: list[Literal['linear', 'softplus', 'tanh', 'relu', 'sigmoid']] = field(init=False)
    name: str = field(init=False)

    def __post_init__(self) -> None:
        self.hidden_sizes = [self.num_nodes for _ in range(self.num_hidden_layers)]
        self.hidden_sizes.append(self.output_size)

        self.activations = [self.activation for _ in range(self.num_hidden_layers)]
        self.activations.append('linear')

        self.name = f'{self.num_hidden_layers}x{self.num_nodes}_{self.activation}_{self.epochs}epochs'


class ParamGrid(TypedDict):
    input_size: list[int]
    output_size: list[int]
    num_hidden_layers: list[int]
    num_nodes: list[int]
    activation: list[Literal['linear', 'softplus', 'tanh', 'relu', 'sigmoid']]
    epochs: list[int]


def get_grid(param_grid: ParamGrid) -> list[Setting]:
    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in product(*values)]
    return [Setting(**combo) for combo in combinations]

#### Model calibration

In [4]:
def train_multiple_models(combinations: list[Setting]) -> tuple[dict[str, Model], dict[str, np.ndarray]]:
    models: dict[str, Model] = {}
    loss_history: dict[str, np.ndarray] = {}
    for setting in combinations:
        model = CustomFFNN(input_size=setting.input_size, hidden_sizes=setting.hidden_sizes, activations=setting.activations)
        model.compile('adam', 'mse')
        model.optimizer.learning_rate.assign(0.01)
        
        t1 = now()
        h = model.fit(xs_c, ys_c, epochs=setting.epochs, verbose=0)
        t2 = now()
        print(f'{setting.name} took', t2 - t1, '(sec) to calibrate the model')

        models[setting.name] = model
        loss_history[setting.name] = h.history['loss']

    return models, loss_history

#### Hidden sizes sweep

In [5]:
combinations_111 = get_grid(ParamGrid(
    input_size=[1],
    output_size=[1],
    num_hidden_layers=[1, 2, 3],
    num_nodes=[4, 8, 16],
    activation=['softplus'],
    epochs=[500],
))
models_111, loss_history_111 = train_multiple_models(combinations_111)

1x4_softplus_500epochs took 0:00:17.353010 (sec) to calibrate the model
1x8_softplus_500epochs took 0:00:16.515016 (sec) to calibrate the model
1x16_softplus_500epochs took 0:00:17.393986 (sec) to calibrate the model
2x4_softplus_500epochs took 0:00:20.633505 (sec) to calibrate the model
2x8_softplus_500epochs took 0:00:19.819884 (sec) to calibrate the model
2x16_softplus_500epochs took 0:00:20.025724 (sec) to calibrate the model
3x4_softplus_500epochs took 0:00:17.897089 (sec) to calibrate the model
3x8_softplus_500epochs took 0:00:19.870068 (sec) to calibrate the model
3x16_softplus_500epochs took 0:00:18.770121 (sec) to calibrate the model


In [None]:
p_loss_111 = plot_loss_history(loss_history_111, *get_figure_size(ratio=1.2), use_latex_style=USE_LATEX_STYLE)
show(p_loss_111)

save_figures_button([('1_1_loss_hidden_sizes_sweep', p_loss_111)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

In [7]:
predictions_111 = {
    model_name: model.predict(xs) for model_name, model in models_111.items() 
}
p_pred_111 = plot_x_y(predictions_111, xs, ys, xs_c, ys_c, *get_figure_size(ratio=0.8), use_latex_style=USE_LATEX_STYLE)
show(p_pred_111)

save_figures_button([('1_1_pred_hidden_sizes_sweep', p_pred_111)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

#### Epoch variation

In [9]:
# higher epoochs
combinations_112 = get_grid(ParamGrid(
    input_size=[1],
    output_size=[1],
    num_hidden_layers=[2, 3],
    num_nodes=[8, 16],
    activation=['softplus'],
    epochs=[500, 1000, 2500, 5000],
))
models_112, loss_history_112 = train_multiple_models(combinations_112)

2x8_softplus_500epochs took 0:00:18.588898 (sec) to calibrate the model
2x8_softplus_1000epochs took 0:00:43.948326 (sec) to calibrate the model
2x8_softplus_2500epochs took 0:01:26.905684 (sec) to calibrate the model
2x8_softplus_5000epochs took 0:02:49.084637 (sec) to calibrate the model
2x16_softplus_500epochs took 0:00:17.808869 (sec) to calibrate the model
2x16_softplus_1000epochs took 0:00:34.725403 (sec) to calibrate the model
2x16_softplus_2500epochs took 0:01:25.866535 (sec) to calibrate the model
2x16_softplus_5000epochs took 0:02:52.053030 (sec) to calibrate the model
3x8_softplus_500epochs took 0:00:19.673882 (sec) to calibrate the model
3x8_softplus_1000epochs took 0:16:05.063424 (sec) to calibrate the model
3x8_softplus_2500epochs took 0:01:30.375588 (sec) to calibrate the model
3x8_softplus_5000epochs took 0:03:06.601979 (sec) to calibrate the model
3x16_softplus_500epochs took 0:00:18.424295 (sec) to calibrate the model
3x16_softplus_1000epochs took 0:00:41.967346 (sec)

In [11]:
p_loss_112 = plot_loss_history(loss_history_112, *get_figure_size(ratio=0.8), use_latex_style=USE_LATEX_STYLE)
show(p_loss_112)

save_figures_button([('1_1_loss_epoch_variation', p_loss_112)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

In [12]:
predictions_112 = {
    model_name: model.predict(xs) for model_name, model in models_112.items() 
}
p_pred_112 = plot_x_y(predictions_112, xs, ys, xs_c, ys_c, *get_figure_size(ratio=0.8), use_latex_style=USE_LATEX_STYLE)
show(p_pred_112)

save_figures_button([('1_1_pred_epoch_variation', p_pred_112)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[

ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

#### Activation function variation

In [13]:
# activation function shuffle
combinations_113 = get_grid(ParamGrid(
    input_size=[1],
    output_size=[1],
    num_hidden_layers=[2],
    num_nodes=[16],
    activation=['relu', 'tanh', 'sigmoid', 'softplus'],
    epochs=[1000,],
))
models_113, loss_history_113 = train_multiple_models(combinations_113)

2x16_relu_1000epochs took 0:00:36.252780 (sec) to calibrate the model
2x16_tanh_1000epochs took 0:00:33.708723 (sec) to calibrate the model
2x16_sigmoid_1000epochs took 0:00:32.701093 (sec) to calibrate the model
2x16_softplus_1000epochs took 0:00:35.718544 (sec) to calibrate the model


In [14]:
p_loss_113 = plot_loss_history(loss_history_111, *get_figure_size(ratio=1.2), use_latex_style=USE_LATEX_STYLE)
show(p_loss_113)

save_figures_button([('1_1_loss_actfun_var', p_loss_113)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

In [17]:
predictions_113 = {
    model_name: model.predict(xs) for model_name, model in models_113.items() 
}
p_pred_113 = plot_x_y(predictions_113, xs, ys, xs_c, ys_c, *get_figure_size(ratio=1.15), use_latex_style=USE_LATEX_STYLE)
show(p_pred_113)

save_figures_button([('1_1_pred_actfun_var', p_pred_113)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

3x8 with softplus activation and 500 epochs seems to be very good. But in general I would tend to test tanh with more epochs and maybe one more layer, because this should extrapolate best. 

## Task 1.2 - Input convex neural networks

In [3]:
icnn_sp = ICNN(input_size=1, hidden_sizes=[16, 16, 1], activations=['softplus', 'softplus', 'linear'])
icnn_sp.compile('adam', 'mse')
icnn_sp.optimizer.learning_rate.assign(0.01)

t1 = now()
h = icnn_sp.fit(xs_c, ys_c, epochs=1000, verbose=0)
t2 = now()
print(f'ICNN with softplus took', t2 - t1, '(sec) to calibrate the model')

h_icnn_sp = h.history['loss']

ICNN with softplus took 0:00:37.374731 (sec) to calibrate the model


In [4]:
icnn_rl = ICNN(input_size=1, hidden_sizes=[16, 16, 1], activations=['relu', 'relu', 'linear'])
icnn_rl.compile('adam', 'mse')
icnn_rl.optimizer.learning_rate.assign(0.01)

t1 = now()
h = icnn_rl.fit(xs_c, ys_c, epochs=1000, verbose=0)
t2 = now()
print(f'ICNN with relu took', t2 - t1, '(sec) to calibrate the model')

h_icnn_rl = h.history['loss']

ICNN with relu took 0:00:37.836197 (sec) to calibrate the model


In [5]:
ffnn = CustomFFNN(input_size=1, hidden_sizes=[16, 16, 1], activations=['tanh', 'tanh', 'linear'])
ffnn.compile('adam', 'mse')
ffnn.optimizer.learning_rate.assign(0.01)

t1 = now()
h = ffnn.fit(xs_c, ys_c, epochs=1000, verbose=0)
t2 = now()
print(f'FFNN with tanh took', t2 - t1, '(sec) to calibrate the model')

h_ffnn = h.history['loss']

FFNN with tanh took 0:00:38.307508 (sec) to calibrate the model


In [6]:
comp_loss_hist = {'FFNN': h_ffnn, 'ICNN - Softplus': h_icnn_sp, 'ICNN - RELU': h_icnn_rl}
p_loss_12 = plot_loss_history(comp_loss_hist,  *FIG_SIZE, use_latex_style=USE_LATEX_STYLE)
show(p_loss_12)

save_figures_button([('1_2_loss', p_loss_12)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…

In [10]:
predictions_12 = {
    'FFNN': ffnn.predict(xs), 
    'ICNN-Softplus': icnn_sp.predict(xs), 
    'ICNN-RELU': icnn_rl.predict(xs)
}
p_prediction_12 = plot_x_y(predictions_12, xs, ys, xs_c, ys_c, *get_figure_size(ratio=1.25), use_latex_style=USE_LATEX_STYLE)
show(p_prediction_12)

save_figures_button([('1_2_prediction', p_prediction_12)], svg_results_dir=PLOTS_DIR, png_results_dir=PLOTS_DIR)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


ToggleButtons(description='Save Figures with format:', options=('svg', 'png', 'both'), tooltips=('Saves figure…