In [1]:
%matplotlib inline

import os
import sys

PACKAGE_PARENT = '..'
sys.path.append(PACKAGE_PARENT)

import torch
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from gaminet import GAMINetRegressor
from gaminet.utils import local_visualize
from gaminet.utils import global_visualize_density
from gaminet.utils import feature_importance_visualize
from gaminet.utils import plot_trajectory
from gaminet.utils import plot_regularization

## Load data

In [2]:
def metric_wrapper(metric, scaler):
    def wrapper(label, pred):
        return metric(label, pred, scaler=scaler)
    return wrapper

def rmse(label, pred, scaler):
    pred = scaler.inverse_transform(pred.reshape([-1, 1]))
    label = scaler.inverse_transform(label.reshape([-1, 1]))
    return np.sqrt(np.mean((pred - label)**2))

def data_generator1(datanum, dist="uniform", random_state=0):
    
    nfeatures = 100
    np.random.seed(random_state)
    x = np.random.uniform(0, 1, [datanum, nfeatures])
    x1, x2, x3, x4, x5, x6 = [x[:, [i]] for i in range(6)]

    def cliff(x1, x2):
        # x1: -20,20
        # x2: -10,5
        x1 = (2 * x1 - 1) * 20
        x2 = (2 * x2 - 1) * 7.5 - 2.5
        term1 = -0.5 * x1 ** 2 / 100
        term2 = -0.5 * (x2 + 0.03 * x1 ** 2 - 3) ** 2
        y = 10 * np.exp(term1 + term2)
        return  y

    y = (8 * (x1 - 0.5) ** 2
        + 0.1 * np.exp(-8 * x2 + 4)
        + 3 * np.sin(2 * np.pi * x3 * x4) + cliff(x5, x6)
        ).reshape([-1,1]) + 1 * np.random.normal(0, 1, [datanum, 1])

    task_type = "Regression"
    meta_info = {"X" + str(i + 1):{'type':'continuous'} for i in range(nfeatures)}
    meta_info.update({'Y':{'type':'target'}})         
    for i, (key, item) in enumerate(meta_info.items()):
        if item['type'] == 'target':
            sy = MinMaxScaler((0, 1))
            y = sy.fit_transform(y)
            meta_info[key]['scaler'] = sy
        else:
            sx = MinMaxScaler((0, 1))
            sx.fit([[0], [1]])
            x[:,[i]] = sx.transform(x[:,[i]])
            meta_info[key]['scaler'] = sx

    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=random_state)
    return train_x, test_x, train_y, test_y, task_type, meta_info, metric_wrapper(rmse, sy)

random_state = 0
train_x, test_x, train_y, test_y, task_type, meta_info, get_metric = data_generator1(datanum=1000000, random_state=random_state)

# Compare pytorch and tensorflow GAM

In [3]:
import torch


class TensorLayer(torch.nn.Module):

    def __init__(self, n_subnets, subnet_arch, n_input_nodes, activation_func, device):
        super().__init__()

        self.device = device
        self.n_subnets = n_subnets
        self.n_input_nodes = n_input_nodes
        self.activation_func = activation_func
        self.n_hidden_layers = len(subnet_arch)

        all_biases = [] 
        all_weights = []
        n_hidden_nodes_prev = n_input_nodes
        for i, n_hidden_nodes in enumerate(subnet_arch + [1]):
            if i == 0:
                w = torch.nn.Parameter(torch.empty(size=(n_subnets, n_hidden_nodes_prev, n_hidden_nodes),
                                dtype=torch.float, requires_grad=True, device=device))
                b = torch.nn.Parameter(torch.empty(size=(n_subnets, n_hidden_nodes),
                                dtype=torch.float, requires_grad=True, device=device))
            elif i == self.n_hidden_layers:
                w = torch.nn.Parameter(torch.empty(size=(n_subnets, n_hidden_nodes_prev, 1),
                                dtype=torch.float, requires_grad=True, device=device))
                b = torch.nn.Parameter(torch.empty(size=(n_subnets, 1),
                                dtype=torch.float, requires_grad=True, device=device))
            else:
                w = torch.nn.Parameter(torch.empty(size=(n_subnets, n_hidden_nodes_prev, n_hidden_nodes),
                                dtype=torch.float, requires_grad=True, device=device))
                b = torch.nn.Parameter(torch.empty(size=(n_subnets, n_hidden_nodes),
                                dtype=torch.float, requires_grad=True, device=device))
            n_hidden_nodes_prev = n_hidden_nodes
            torch.nn.init.zeros_(b)
            for j in range(n_subnets):
                torch.nn.init.orthogonal_(w[j])
            all_biases.append(b)
            all_weights.append(w)
        self.all_biases = torch.nn.ParameterList(all_biases)
        self.all_weights = torch.nn.ParameterList(all_weights)

    def individual_forward(self, inputs, idx):

        xs = inputs
        for i in range(self.n_hidden_layers):
            xs = self.activation_func(torch.matmul(xs, self.all_weights[i][idx]) + self.all_biases[i][idx])
        outputs = torch.matmul(xs, self.all_weights[-1][idx]) + self.all_biases[-1][idx]
        return outputs

    def forward(self, inputs):

        xs = torch.unsqueeze(torch.transpose(inputs, 0, 1), 2)
        for i in range(self.n_hidden_layers):
            xs = self.activation_func(torch.matmul(xs, self.all_weights[i])
                              + torch.reshape(self.all_biases[i], [self.n_subnets, 1, -1]))

        outputs = torch.matmul(xs, self.all_weights[-1]) + torch.reshape(self.all_biases[-1], [self.n_subnets, 1, -1])
        outputs = torch.squeeze(torch.transpose(outputs, 0, 1), dim=2)
        outputs = outputs.sum(1)
        return outputs

random_state = 0
np.random.seed(random_state)
torch.manual_seed(random_state)
net = TensorLayer(n_subnets=5, subnet_arch=[10], n_input_nodes=1, activation_func=torch.nn.ReLU(), device="cpu")
coefs = [[net.all_weights[0][i].detach().numpy().copy(), net.all_weights[1][i].detach().numpy().copy()] for i in range(5)]
net.forward(torch.tensor(train_x[:10, :5], dtype=torch.float32))

tensor([-0.0842, -0.0735, -0.0710, -0.1213,  0.0126, -0.1070, -0.2776, -0.0694,
        -0.2091,  0.1363], grad_fn=<SumBackward1>)

In [4]:
from gaminet.dataloader import FastTensorDataLoader
loss_fn = torch.nn.MSELoss(reduction="none")
opt = torch.optim.Adam(list(net.parameters()), lr=0.01)
for epoch in range(10):
    net.train()
    opt.zero_grad(set_to_none=True)
    batch_xx = torch.tensor(train_x[:100, :5], dtype=torch.float32)
    batch_yy = torch.tensor(train_y[:100], dtype=torch.float32).ravel()
    pred = net(batch_xx).ravel()
    loss = torch.mean(loss_fn(pred, batch_yy))
    loss.backward()
    opt.step()
    print(net(torch.tensor(train_x[:10, :5], dtype=torch.float32)).ravel().detach().numpy().round(3))

[ 0.077  0.1    0.124  0.051  0.149  0.053 -0.112  0.072 -0.029  0.291]
[0.233 0.264 0.312 0.217 0.277 0.205 0.047 0.209 0.145 0.435]
[0.373 0.41  0.477 0.367 0.388 0.342 0.191 0.333 0.301 0.564]
[0.479 0.52  0.603 0.482 0.474 0.448 0.302 0.425 0.421 0.66 ]
[0.539 0.583 0.674 0.549 0.521 0.509 0.367 0.479 0.489 0.713]
[0.555 0.599 0.694 0.568 0.533 0.528 0.386 0.494 0.509 0.725]
[0.539 0.582 0.674 0.552 0.518 0.513 0.372 0.481 0.493 0.708]
[0.502 0.541 0.628 0.512 0.485 0.476 0.336 0.448 0.453 0.669]
[0.453 0.488 0.566 0.457 0.442 0.426 0.286 0.404 0.399 0.619]
[0.398 0.428 0.498 0.396 0.395 0.37  0.23  0.354 0.339 0.564]


In [5]:
import tensorflow as tf
from tensorflow.keras import layers

class NumerNet(tf.keras.layers.Layer):

    def __init__(self, subnet_arch, activation_func, weight_init, subnet_id):
        super(NumerNet, self).__init__()
        self.layers = []
        self.subnet_arch = subnet_arch
        self.activation_func = activation_func
        self.subnet_id = subnet_id
        for nodes in self.subnet_arch:
            self.layers.append(layers.Dense(nodes, activation=self.activation_func,
                                            kernel_initializer=tf.keras.initializers.Constant(weight_init[0])))
        self.output_layer = layers.Dense(1, activation=tf.identity, kernel_initializer=tf.keras.initializers.Constant(weight_init[1]))

    def call(self, inputs):

        x = inputs
        for dense_layer in self.layers:
            x = dense_layer(x)
        output = self.output_layer(x)
        return output


class MainEffectBlock(tf.keras.layers.Layer):

    def __init__(self, n_subnets, subnet_arch, activation_func):
        super(MainEffectBlock, self).__init__()

        self.n_subnets = n_subnets
        self.subnet_arch = subnet_arch
        self.activation_func = activation_func
        self.subnets = []
        for i in range(self.n_subnets):
            self.subnets.append(NumerNet(self.subnet_arch, self.activation_func, weight_init=coefs[i], subnet_id=i))

    def call(self, inputs):

        self.subnet_outputs = []
        for i in range(self.n_subnets):
            subnet = self.subnets[i]
            subnet_output = subnet(tf.gather(inputs, [i], axis=1))
            self.subnet_outputs.append(subnet_output)
        output = tf.reshape(tf.squeeze(tf.stack(self.subnet_outputs, 1)), [-1, self.n_subnets])
        output = tf.reduce_sum(output, 1)
        return output

tfnet = MainEffectBlock(5, [10], activation_func=tf.nn.relu)
print(tfnet.__call__(train_x[:10, :5]).numpy())

2022-04-03 10:42:04.035523: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-03 10:42:04.035549: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


[-0.08416221 -0.07353798 -0.07099413 -0.12131885  0.01259092 -0.10699715
 -0.2775642  -0.06943712 -0.20914906  0.13627838]


2022-04-03 10:42:06.359079: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-04-03 10:42:06.359102: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-04-03 10:42:06.359116: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (iZwz989gvg9q1cbx1wnjqlZ): /proc/driver/nvidia/version does not exist
2022-04-03 10:42:06.359312: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss_fn = tf.keras.losses.MeanSquaredError()
for epoch in range(10):
    batch_xx = train_x[:100, :5]
    batch_yy = train_y[:100].ravel()
    with tf.GradientTape() as tape:
        pred = tfnet.__call__(batch_xx)
        total_loss = loss_fn(batch_yy, pred)
    grads = tape.gradient(total_loss, tfnet.trainable_weights)
    optimizer.apply_gradients(zip(grads, tfnet.trainable_weights))
    print(tfnet.__call__(train_x[:10, :5]).numpy().round(3))

[ 0.077  0.1    0.124  0.051  0.149  0.053 -0.112  0.072 -0.029  0.291]
[0.233 0.264 0.312 0.217 0.277 0.205 0.047 0.209 0.145 0.435]
[0.373 0.41  0.477 0.367 0.388 0.342 0.191 0.333 0.301 0.564]
[0.479 0.52  0.603 0.482 0.474 0.448 0.302 0.425 0.421 0.66 ]
[0.539 0.583 0.674 0.549 0.521 0.509 0.367 0.479 0.489 0.713]
[0.555 0.599 0.694 0.568 0.533 0.528 0.386 0.494 0.509 0.725]
[0.539 0.582 0.674 0.552 0.518 0.513 0.372 0.481 0.493 0.708]
[0.502 0.541 0.628 0.512 0.485 0.476 0.336 0.448 0.453 0.669]
[0.453 0.488 0.566 0.457 0.442 0.426 0.286 0.404 0.399 0.619]
[0.398 0.428 0.498 0.396 0.395 0.37  0.23  0.354 0.339 0.564]
