In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
from sklearn.metrics import r2_score, accuracy_score
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [3]:
X, y = load_boston(return_X_y=True)
X.shape, y.shape

((506, 13), (506,))

In [4]:
batch_size=64

In [5]:
scaler = MinMaxScaler()


X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2137)

X_train = torch.from_numpy(X_train.astype('float32'))
X_test = torch.from_numpy(X_test.astype('float32'))
y_train = torch.from_numpy(y_train.reshape(-1, 1).astype('float32'))
y_test = torch.from_numpy(y_test.reshape(-1, 1).astype('float32'))

dataset_train = torch.utils.data.TensorDataset(X_train, y_train)
dataset_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

In [6]:
inputs, targets = next(iter(dataset_train))

In [7]:
inputs.shape, targets.shape

(torch.Size([64, 13]), torch.Size([64, 1]))

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(13, 32)
        self.layer2 = nn.Linear(32, 16)
        self.layer3 = nn.Linear(16, 12)
        self.layer4 = nn.Linear(12, 1)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.1)
        self.dropout3 = nn.Dropout(0.1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.dropout1(x)
        x = F.relu(self.layer2(x))
        x = self.dropout2(x)
        x = F.relu(self.layer3(x))
        x = self.dropout3(x)
        x = self.layer4(x)
        return x

nn_model = NeuralNetwork()
nn_model

NeuralNetwork(
  (layer1): Linear(in_features=13, out_features=32, bias=True)
  (layer2): Linear(in_features=32, out_features=16, bias=True)
  (layer3): Linear(in_features=16, out_features=12, bias=True)
  (layer4): Linear(in_features=12, out_features=1, bias=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)

In [9]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(nn_model.parameters())

In [10]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = nn_model(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        print(nn_model.layer1.weight.grad)
        optimizer.step()
    print(loss)

7e+00,
          6.1139e+00,  4.7226e+00,  3.6640e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-1.8867e-01,  1.4512e+00,  1.1877e+00, -2.8048e-01, -5.5367e-01,
         -1.9915e+00,  1.5018e+00,  5.3638e+00, -3.4290e+00, -9.0692e-02,
          6.7473e+00,  4.5321e+00,  3.5990e+00],
        [ 7.0578e-01, -2.8597e+00, -2.8985e-01,  2.4704e+00, -3.6860e-01,
         -3.8502e+00, -4.5308e+00, -1.1489e+01,  8.8238e+00,  3.2295e+00,
         -1.2378e+01, -1.6508e+01, -4.9808e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 2.0516e-01,  1.5229e+00,  3.4077e+00, -4.9880e-01,  1.7768e+00,
          3.6457e-01,  5.1268e+00,  5.2756e+00, -8.2464e-01,  2.3081e+00,
        

In [11]:
y_pred = nn_model(X_test)
y_test_numpy = y_test.detach().numpy()
y_pred_numpy = y_pred.detach().numpy()
print(y_test_numpy[:10])
print(y_pred_numpy[:10])
r2_score(y_test_numpy, y_pred_numpy)

[[35.4]
 [50. ]
 [23.9]
 [24.4]
 [27.9]
 [13.8]
 [13.3]
 [30.1]
 [14.9]
 [21.6]]
[[37.674427]
 [31.473034]
 [16.63836 ]
 [19.277927]
 [37.953556]
 [15.370569]
 [19.088264]
 [30.224264]
 [13.163496]
 [29.710129]]


0.5129152689872407

---

In [12]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from loguru import logger

from nam.config import defaults
from nam.types import Config
from nam.utils.args import parse_args
from nam.data import NAMDataset
from nam.models import DNN, FeatureNN, NAM, get_num_units
from nam.engine import Engine

from main import get_config

import pytorch_lightning as pl

In [13]:
config = get_config()
config.shallow = True
config.dropout = 0.0
print(config)
pl.seed_everything(config.seed)

Config(activation='exu', batch_size=1024, cross_val=False, data_split=1, dataset_name='Teleco', debug=False, decay_rate=0.995, device='cpu', dropout=0.0, early_stopping_epochs=60, feature_dropout=0.0, fold_num=1, l2_regularization=0.0, learning_rate=0.01, lr=0.01, max_checkpoints_to_keep=1, n_folds=5, n_models=1, num_basis_functions=1000, num_splits=3, num_units=64, num_workers=16, output_dir='output', output_regularization=0.0, patience=10, regression=False, save_checkpoint_every_n_epochs=10, seed=1, shallow=True, shuffle=True, training_epochs=10, units='ReLU', units_multiplier=2, use_dnn=False)


1

## FeatureNN

> Testing the `FeatureNN` network as a regular NN with input_shape 13 for the whole features and output 1
> This is for testing `ExU` Unit and how it behave

In [14]:
fnn = FeatureNN(
      config=config,
      name=f'FeatureNN_{0}',
      input_shape=13,
      num_units=128,
)

In [15]:
fnn

FeatureNN(
  (model): Sequential(
    (0): Linear(in_features=13, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
    (3): ReLU()
  )
)

In [16]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(fnn.parameters())

In [17]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = fnn(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        print(fnn.layers[0].weight.grad)
        optimizer.step()
    print(loss)

      [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ...,

In [18]:
y_pred = fnn(X_test)
y_test_numpy = y_test.detach().numpy()
y_pred_numpy = y_pred.detach().numpy()
print(y_test_numpy[:10])
print(y_pred_numpy[:10])
r2_score(y_test_numpy, y_pred_numpy)

[[35.4]
 [50. ]
 [23.9]
 [24.4]
 [27.9]
 [13.8]
 [13.3]
 [30.1]
 [14.9]
 [21.6]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


-4.407978322652704

## NAM

In [26]:
model = NAM(
      config=config,
      name="NAMModel",
      num_inputs=X.shape[1],
      num_units=get_num_units(config, dataset_train)
)

In [21]:
model

NAM(
  (feature_nns): Sequential(
    (FeatureNN_0): FeatureNN(
      (model): Sequential(
        (0): Linear(in_features=1, out_features=128, bias=True)
        (1): ReLU()
        (2): Linear(in_features=128, out_features=1, bias=True)
        (3): ReLU()
      )
    )
    (FeatureNN_1): FeatureNN(
      (model): Sequential(
        (0): Linear(in_features=1, out_features=30, bias=True)
        (1): ReLU()
        (2): Linear(in_features=30, out_features=1, bias=True)
        (3): ReLU()
      )
    )
    (FeatureNN_2): FeatureNN(
      (model): Sequential(
        (0): Linear(in_features=1, out_features=66, bias=True)
        (1): ReLU()
        (2): Linear(in_features=66, out_features=1, bias=True)
        (3): ReLU()
      )
    )
    (FeatureNN_3): FeatureNN(
      (model): Sequential(
        (0): Linear(in_features=1, out_features=4, bias=True)
        (1): ReLU()
        (2): Linear(in_features=4, out_features=1, bias=True)
        (3): ReLU()
      )
    )
    (FeatureNN_4):

In [27]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [28]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = model(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(450.5529, grad_fn=<MseLossBackward>)
tensor(364.2879, grad_fn=<MseLossBackward>)
tensor(264.9506, grad_fn=<MseLossBackward>)
tensor(292.1973, grad_fn=<MseLossBackward>)
tensor(282.2094, grad_fn=<MseLossBackward>)
tensor(118.5042, grad_fn=<MseLossBackward>)
tensor(138.8713, grad_fn=<MseLossBackward>)
tensor(147.2352, grad_fn=<MseLossBackward>)
tensor(135.7615, grad_fn=<MseLossBackward>)
tensor(111.2672, grad_fn=<MseLossBackward>)
tensor(136.1643, grad_fn=<MseLossBackward>)
tensor(60.4116, grad_fn=<MseLossBackward>)
tensor(55.7765, grad_fn=<MseLossBackward>)
tensor(152.2850, grad_fn=<MseLossBackward>)
tensor(45.7922, grad_fn=<MseLossBackward>)
tensor(69.9979, grad_fn=<MseLossBackward>)
tensor(80.1373, grad_fn=<MseLossBackward>)
tensor(137.9770, grad_fn=<MseLossBackward>)
tensor(80.2280, grad_fn=<MseLossBackward>)
tensor(122.4508, grad_fn=<MseLossBackward>)
tensor(41.2468, grad_fn=<MseLossBackward>)
tensor(217.5053, grad_fn=<MseLossBackward>)
tensor(41.2669, grad_fn=<MseLossBackwar

In [29]:
y_pred = model(X_test)
y_test_numpy = y_test.detach().numpy()
y_pred_numpy = y_pred.detach().numpy()
r2_score(y_test_numpy, y_pred_numpy)

0.6219556961732786