## Imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import pytorch_lightning as pl

In [3]:
from nam.data import *
from nam.config import defaults
from nam.models import NAM, DNN, get_num_units
from nam.engine import Engine

## Configuration 

In [4]:
config = defaults()
print(config)

Config(activation='exu', batch_size=1024, cross_val=False, data_split=1, debug=False, decay_rate=0.995, device='cpu', dropout=0.5, feature_dropout=0.0, fold_num=1, hidden_sizes=[64, 32], l2_regularization=0.0, lr=0.01, n_folds=5, n_models=1, num_basis_functions=1000, num_splits=3, num_units=64, num_workers=16, optimizer='adam', output_dir='output', output_regularization=0.0, patience=10, regression=True, seed=1377, shuffle=True, training_epochs=10, units_multiplier=2, use_dnn=False)


In [5]:
pl.seed_everything(config.seed)

Global seed set to 1377


1377

## Dataset & Dataloaders (California Housing Dataset)

In [6]:
dataset = load_sklearn_housing_data(config=config) 
# dataset = load_breast_data(config=config)
# dataset = load_gallup_data(
#     config=config,
#     features_columns= ["income_2", "WP1219", "WP1220", "weo_gdpc_con_ppp"]
# )

In [7]:
len(dataset.data)

20640

In [8]:
dataset.features.shape

torch.Size([20640, 8])

In [9]:
dataset.targets.max().item()

5.000010013580322

In [10]:
dataset.targets.shape

torch.Size([20640])

In [11]:
train_dls = dataset.train_dataloaders()

In [12]:
test_dl = dataset.test_dataloaders()

## NAM Model

In [13]:
model = NAM(
      config=config,
      name="NAMModel_Housing",
      num_inputs=len(dataset[0][0]),
      num_units=get_num_units(config, dataset.features),
#       num_outputs=int(dataset.targets.max().item() + 1),
)
model

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)


NAM(
  (feature_nns): Sequential(
    (FeatureNN_0): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=1000)
        (1): LinReLU(in_features=1000, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_1): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=104)
        (1): LinReLU(in_features=104, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_2): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=1000)
        (1): LinReLU(in_features=1000, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_3): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_featur

## Training

In [14]:
engine = Engine(config, model)
# engine

In [15]:
checkpoint_callback = pl.callbacks.model_checkpoint.ModelCheckpoint(
    dirpath=f"{config.output_dir}/{model.name}/checkpoints",
    filename=model.name + "-{epoch:02d}-{val_loss:.4f}", 
    monitor='val_loss', 
    save_top_k=3,
    mode='min'
)

In [16]:
while True:
    try:
        train_dl, val_dl = next(train_dls)
        trainer = pl.Trainer(
            default_root_dir=f"{config.output_dir}/{model.name}",
            max_epochs=config.training_epochs,
            callbacks=[checkpoint_callback],
        )
        trainer.fit(model=engine, train_dataloader=train_dl, val_dataloaders=val_dl)
    except StopIteration:
        break

2021-03-29 03:46:50.787 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[1]: train: 14448, val: 2064
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 486 K 
-------------------------------
486 K     Trainable params
0         Non-trainable params
486 K     Total params
1.944     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:47:21.276 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[2]: train: 14448, val: 2064
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 486 K 
-------------------------------
486 K     Trainable params
0         Non-trainable params
486 K     Total params
1.944     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:47:52.223 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[3]: train: 14448, val: 2064
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 486 K 
-------------------------------
486 K     Trainable params
0         Non-trainable params
486 K     Total params
1.944     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:48:23.674 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[4]: train: 14448, val: 2064
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 486 K 
-------------------------------
486 K     Trainable params
0         Non-trainable params
486 K     Total params
1.944     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:48:55.016 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[5]: train: 14448, val: 2064
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 486 K 
-------------------------------
486 K     Trainable params
0         Non-trainable params
486 K     Total params
1.944     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## Testing

In [17]:
trainer.test(model=engine, test_dataloaders=test_dl)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.6098895072937012, 'test_loss_epoch': 1.2295547723770142}
--------------------------------------------------------------------------------


[{'test_loss': 0.6098895072937012, 'test_loss_epoch': 1.2295547723770142}]

---
---
---

## Dataset & Dataloaders (Breast Cancer Dataset)

In [18]:
# dataset = load_sklearn_housing_data(config=config) 
dataset = load_breast_data(config=config)
# dataset = load_gallup_data(
#     config=config,
#     features_columns= ["income_2", "WP1219", "WP1220", "weo_gdpc_con_ppp"]
# )

In [19]:
len(dataset.data)

569

In [20]:
dataset.features.shape

torch.Size([569, 30])

In [21]:
dataset.targets.max().item()

1.0

In [22]:
dataset.targets.shape

torch.Size([569])

In [23]:
train_dls = dataset.train_dataloaders()

In [24]:
test_dl = dataset.test_dataloaders()

## NAM Model

In [25]:
model = NAM(
      config=config,
      name="NAMModel_Breast",
      num_inputs=len(dataset[0][0]),
      num_units=get_num_units(config, dataset.features),
#       num_outputs=int(dataset.targets.max().item() + 1),
)
model

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)


NAM(
  (feature_nns): Sequential(
    (FeatureNN_0): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=912)
        (1): LinReLU(in_features=912, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_1): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=958)
        (1): LinReLU(in_features=958, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_2): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=1000)
        (1): LinReLU(in_features=1000, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_3): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features

## Training

In [26]:
engine = Engine(config, model)
# engine

In [27]:
checkpoint_callback = pl.callbacks.model_checkpoint.ModelCheckpoint(
    dirpath=f"{config.output_dir}/{model.name}/checkpoints",
    filename=model.name + "-{epoch:02d}-{val_loss:.4f}", 
    monitor='val_loss', 
    save_top_k=3,
    mode='min'
)

In [28]:
while True:
    try:
        train_dl, val_dl = next(train_dls)
        trainer = pl.Trainer(
            default_root_dir=f"{config.output_dir}/{model.name}",
            max_epochs=config.training_epochs,
            callbacks=[checkpoint_callback],
        )
        trainer.fit(model=engine, train_dataloader=train_dl, val_dataloaders=val_dl)
    except StopIteration:
        break

2021-03-29 03:49:28.093 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[1]: train: 398, val: 57
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 2.0 M 
-------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
8.018     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:49:35.547 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[2]: train: 398, val: 57
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 2.0 M 
-------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
8.018     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:49:42.161 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[3]: train: 398, val: 57
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 2.0 M 
-------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
8.018     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:49:49.607 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[4]: train: 398, val: 57
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 2.0 M 
-------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
8.018     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:49:56.157 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[5]: train: 398, val: 57
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 2.0 M 
-------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
8.018     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## Testing

In [29]:
trainer.test(model=engine, test_dataloaders=test_dl)

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.2780880630016327, 'test_loss_epoch': 0.2780880630016327}
--------------------------------------------------------------------------------


[{'test_loss': 0.2780880630016327, 'test_loss_epoch': 0.2780880630016327}]

---
---
---

## Dataset & Dataloaders (GALLUP Dataset)

In [30]:
# dataset = load_sklearn_housing_data(config=config) 
# dataset = load_breast_data(config=config)
dataset = load_gallup_data(
    config=config,
    features_columns= ["income_2", "WP1219", "WP1220", "weo_gdpc_con_ppp"]
)

In [31]:
len(dataset.data)

1340809

In [32]:
dataset.features.shape

torch.Size([1340809, 4])

In [33]:
dataset.targets.max().item()

10.0

In [34]:
dataset.targets.shape

torch.Size([1340809])

In [35]:
train_dls = dataset.train_dataloaders()

In [36]:
test_dl = dataset.test_dataloaders()

## NAM Model

In [37]:
model = NAM(
      config=config,
      name="NAMModel_GALLUP",
      num_inputs=len(dataset[0][0]),
      num_units=get_num_units(config, dataset.features),
#       num_outputs=int(dataset.targets.max().item() + 1),
)
model

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)


NAM(
  (feature_nns): Sequential(
    (FeatureNN_0): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=1000)
        (1): LinReLU(in_features=1000, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_1): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=4)
        (1): LinReLU(in_features=4, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_2): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, out_features=176)
        (1): LinReLU(in_features=176, out_features=64)
        (2): LinReLU(in_features=64, out_features=32)
        (3): Linear(in_features=32, out_features=1, bias=True)
      )
    )
    (FeatureNN_3): FeatureNN(
      (model): ModuleList(
        (0): ExU(in_features=1, 

## Training

In [38]:
engine = Engine(config, model)
# engine

In [39]:
checkpoint_callback = pl.callbacks.model_checkpoint.ModelCheckpoint(
    dirpath=f"{config.output_dir}/{model.name}/checkpoints",
    filename=model.name + "-{epoch:02d}-{val_loss:.4f}", 
    monitor='val_loss', 
    save_top_k=3,
    mode='min'
)

In [None]:
while True:
    try:
        train_dl, val_dl = next(train_dls)
        trainer = pl.Trainer(
            default_root_dir=f"{config.output_dir}/{model.name}",
            max_epochs=config.training_epochs,
            callbacks=[checkpoint_callback],
        )
        trainer.fit(model=engine, train_dataloader=train_dl, val_dataloaders=val_dl)
    except StopIteration:
        break

2021-03-29 03:50:05.596 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[1]: train: 938566, val: 134081
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 152 K 
-------------------------------
152 K     Trainable params
0         Non-trainable params
152 K     Total params
0.610     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

2021-03-29 03:58:56.088 | INFO     | nam.data.folded:train_dataloaders:113 - Fold[2]: train: 938566, val: 134081
GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 152 K 
-------------------------------
152 K     Trainable params
0         Non-trainable params
152 K     Total params
0.610     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## Testing

In [None]:
trainer.test(model=engine, test_dataloaders=test_dl)

---
---
---