In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
from sklearn.metrics import r2_score, accuracy_score
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [3]:
X, y = load_boston(return_X_y=True)
X.shape, y.shape

((506, 13), (506,))

In [4]:
batch_size=64

In [5]:
scaler = MinMaxScaler()


X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2137)

X_train = torch.from_numpy(X_train.astype('float32'))
X_test = torch.from_numpy(X_test.astype('float32'))
y_train = torch.from_numpy(y_train.reshape(-1, 1).astype('float32'))
y_test = torch.from_numpy(y_test.reshape(-1, 1).astype('float32'))

dataset_train = torch.utils.data.TensorDataset(X_train, y_train)
dataset_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

In [6]:
inputs, targets = next(iter(dataset_train))

In [7]:
inputs.shape, targets.shape

(torch.Size([64, 13]), torch.Size([64, 1]))

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(13, 32)
        self.layer2 = nn.Linear(32, 16)
        self.layer3 = nn.Linear(16, 12)
        self.layer4 = nn.Linear(12, 1)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.1)
        self.dropout3 = nn.Dropout(0.1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.dropout1(x)
        x = F.relu(self.layer2(x))
        x = self.dropout2(x)
        x = F.relu(self.layer3(x))
        x = self.dropout3(x)
        x = self.layer4(x)
        return x

model = NeuralNetwork()
model

NeuralNetwork(
  (layer1): Linear(in_features=13, out_features=32, bias=True)
  (layer2): Linear(in_features=32, out_features=16, bias=True)
  (layer3): Linear(in_features=16, out_features=12, bias=True)
  (layer4): Linear(in_features=12, out_features=1, bias=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)

In [9]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [10]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = model(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(543.9302, grad_fn=<MseLossBackward>)
tensor(488.8261, grad_fn=<MseLossBackward>)
tensor(770.0425, grad_fn=<MseLossBackward>)
tensor(658.2023, grad_fn=<MseLossBackward>)
tensor(515.4057, grad_fn=<MseLossBackward>)
tensor(578.2120, grad_fn=<MseLossBackward>)
tensor(591.4470, grad_fn=<MseLossBackward>)
tensor(480.4409, grad_fn=<MseLossBackward>)
tensor(764.2185, grad_fn=<MseLossBackward>)
tensor(441.9752, grad_fn=<MseLossBackward>)
tensor(445.0178, grad_fn=<MseLossBackward>)
tensor(385.0773, grad_fn=<MseLossBackward>)
tensor(469.6506, grad_fn=<MseLossBackward>)
tensor(446.7991, grad_fn=<MseLossBackward>)
tensor(491.9495, grad_fn=<MseLossBackward>)
tensor(300.4225, grad_fn=<MseLossBackward>)
tensor(287.5339, grad_fn=<MseLossBackward>)
tensor(329.1008, grad_fn=<MseLossBackward>)
tensor(275.3817, grad_fn=<MseLossBackward>)
tensor(81.3637, grad_fn=<MseLossBackward>)
tensor(133.9323, grad_fn=<MseLossBackward>)
tensor(155.4055, grad_fn=<MseLossBackward>)
tensor(142.8362, grad_fn=<MseLoss

In [11]:
y_pred = model(X_test)
r2_score(y_test.detach().numpy(), y_pred.detach().numpy())

0.506484471392474

---

In [12]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from loguru import logger

from nam.config import defaults
from nam.types import Config
from nam.utils.args import parse_args
from nam.data import NAMDataset
from nam.models import DNN, FeatureNN, NAM, get_num_units
from nam.engine import Engine

from main import get_config

import pytorch_lightning as pl

In [13]:
config = get_config()
pl.seed_everything(config.seed)

1

## FeatureNN

> Testing the `FeatureNN` network as a regular NN with input_shape 13 for the whole features and output 1
> This is for testing `ExU` Unit and how it behave

In [14]:
fnn = FeatureNN(
      config=config,
      name=f'FeatureNN_{0}',
      input_shape=13,
      num_units=128,
)

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)


In [15]:
fnn

FeatureNN(
  (model): Sequential(
    (0): ExU(in_features=13, out_features=128)
    (1): ExU(in_features=128, out_features=64)
    (2): ExU(in_features=64, out_features=32)
    (3): ExU(in_features=32, out_features=1)
  )
)

In [16]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(fnn.parameters())

In [17]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = fnn(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(1.1852e+19, grad_fn=<MseLossBackward>)
tensor(2.1022e+19, grad_fn=<MseLossBackward>)
tensor(1.9965e+19, grad_fn=<MseLossBackward>)
tensor(1.5021e+19, grad_fn=<MseLossBackward>)
tensor(1.4037e+19, grad_fn=<MseLossBackward>)
tensor(1.0525e+19, grad_fn=<MseLossBackward>)
tensor(1.1914e+19, grad_fn=<MseLossBackward>)
tensor(1.0202e+19, grad_fn=<MseLossBackward>)
tensor(3.8945e+18, grad_fn=<MseLossBackward>)
tensor(9.8691e+18, grad_fn=<MseLossBackward>)
tensor(1.2846e+19, grad_fn=<MseLossBackward>)
tensor(8.2722e+18, grad_fn=<MseLossBackward>)
tensor(6.0455e+18, grad_fn=<MseLossBackward>)
tensor(6.3798e+18, grad_fn=<MseLossBackward>)
tensor(4.7199e+18, grad_fn=<MseLossBackward>)
tensor(5.5048e+18, grad_fn=<MseLossBackward>)
tensor(6.4722e+18, grad_fn=<MseLossBackward>)
tensor(4.7808e+18, grad_fn=<MseLossBackward>)
tensor(5.4019e+18, grad_fn=<MseLossBackward>)
tensor(5.2708e+18, grad_fn=<MseLossBackward>)
tensor(2.2664e+18, grad_fn=<MseLossBackward>)
tensor(6.1229e+18, grad_fn=<MseLos

In [18]:
y_pred = fnn(X_test)
r2_score(y_test.detach().numpy(), y_pred.detach().numpy())

-2781243590242559.0

## NAM

In [19]:
model = NAM(
      config=config,
      name="NAMModel",
      num_inputs=X.shape[1],
      num_units=get_num_units(config, dataset_train)
)

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)


In [20]:
model

NAM(
  (feature_nns): Sequential(
    (FeatureNN_0): FeatureNN(
      (model): Sequential(
        (0): ExU(in_features=1, out_features=128)
        (1): ExU(in_features=128, out_features=64)
        (2): ExU(in_features=64, out_features=32)
        (3): ExU(in_features=32, out_features=1)
      )
    )
    (FeatureNN_1): FeatureNN(
      (model): Sequential(
        (0): ExU(in_features=1, out_features=22)
        (1): ExU(in_features=22, out_features=64)
        (2): ExU(in_features=64, out_features=32)
        (3): ExU(in_features=32, out_features=1)
      )
    )
    (FeatureNN_2): FeatureNN(
      (model): Sequential(
        (0): ExU(in_features=1, out_features=60)
        (1): ExU(in_features=60, out_features=64)
        (2): ExU(in_features=64, out_features=32)
        (3): ExU(in_features=32, out_features=1)
      )
    )
    (FeatureNN_3): FeatureNN(
      (model): Sequential(
        (0): ExU(in_features=1, out_features=4)
        (1): ExU(in_features=4, out_features=64)
   

In [21]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [22]:
for i in range(100):
    optimizer.zero_grad()
    for X, y in dataset_train:
        y_pred = model(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(4.5366e+18, grad_fn=<MseLossBackward>)
tensor(3.6781e+18, grad_fn=<MseLossBackward>)
tensor(3.3963e+18, grad_fn=<MseLossBackward>)
tensor(3.1029e+18, grad_fn=<MseLossBackward>)
tensor(3.1192e+18, grad_fn=<MseLossBackward>)
tensor(3.0629e+18, grad_fn=<MseLossBackward>)
tensor(2.4752e+18, grad_fn=<MseLossBackward>)
tensor(2.3378e+18, grad_fn=<MseLossBackward>)
tensor(2.8230e+18, grad_fn=<MseLossBackward>)
tensor(1.9054e+18, grad_fn=<MseLossBackward>)
tensor(1.6843e+18, grad_fn=<MseLossBackward>)
tensor(1.7714e+18, grad_fn=<MseLossBackward>)
tensor(1.0693e+18, grad_fn=<MseLossBackward>)
tensor(1.4068e+18, grad_fn=<MseLossBackward>)
tensor(1.2338e+18, grad_fn=<MseLossBackward>)
tensor(2.2380e+18, grad_fn=<MseLossBackward>)
tensor(1.7876e+18, grad_fn=<MseLossBackward>)
tensor(1.0749e+18, grad_fn=<MseLossBackward>)
tensor(8.8272e+17, grad_fn=<MseLossBackward>)
tensor(1.5735e+18, grad_fn=<MseLossBackward>)
tensor(1.0268e+18, grad_fn=<MseLossBackward>)
tensor(1.1852e+18, grad_fn=<MseLos

In [23]:
y_pred = model(X_test)
r2_score(y_test.detach().numpy(), y_pred.detach().numpy())

-1684725260964761.8