In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import time
import json
import os
import random
from typing import Dict, Iterator, List, Tuple

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from loguru import logger
from sklearn.datasets import load_breast_cancer

from nam.config.default import defaults
from nam.types import Config
from nam.utils.args import parse_args
from nam.data.base import NAMDataset

In [3]:
config = defaults()
config

namespace(device='cpu',
          logdir='logs',
          lr=0.01,
          batch_size=1024,
          l2_regularization=0.0,
          output_regularization=0.0,
          decay_rate=0.995,
          dropout=0.5,
          feature_dropout=0.0,
          data_split=1,
          seed=1377,
          num_basis_functions=1000,
          units_multiplier=2,
          cross_val=False,
          max_checkpoints_to_keep=1,
          save_checkpoint_every_n_epochs=10,
          n_models=1,
          num_splits=3,
          fold_num=1,
          activation='exu',
          regression=False,
          debug=False,
          shallow=False,
          use_dnn=False,
          early_stopping_epochs=60,
          n_folds=5)

## GALLUP Data

In [4]:
features_columns = ["income_2", "WP1219", "WP1220", "weo_gdpc_con_ppp"]
targets_column = ["WP16"]
weights_column = ["wgt"]

In [5]:
dataset = NAMDataset(config=config,
                    csv_file='data/GALLUP.csv',
                    features_columns=features_columns,
                    targets_column=targets_column,
                    weights_column=weights_column)
dataset

<nam.data.base.NAMDataset at 0x7f82a16dfa50>

In [6]:
dl = DataLoader(dataset, batch_size=32, shuffle=True)

In [7]:
batch = next(iter(dl))

In [8]:
batch

[tensor([[1.2130e+03, 1.0000e+00, 6.0000e+01, 1.3734e+04],
         [7.1340e+03, 2.0000e+00, 2.5000e+01, 1.6296e+03],
         [1.6436e+03, 2.0000e+00, 2.9000e+01, 2.6061e+03],
         [1.2569e+04, 1.0000e+00, 4.0000e+01, 1.5691e+04],
         [7.7961e+02, 1.0000e+00, 2.2000e+01, 1.4847e+03],
         [5.4786e+04, 2.0000e+00, 3.0000e+01, 2.6930e+04],
         [1.3918e+03, 1.0000e+00, 2.8000e+01, 1.1867e+04],
         [1.0856e+04, 1.0000e+00, 4.2000e+01, 8.8024e+03],
         [3.2712e+04, 1.0000e+00, 2.1000e+01, 3.7308e+04],
         [3.9081e+04, 2.0000e+00, 6.9000e+01, 3.6459e+04],
         [7.0829e+03, 1.0000e+00, 3.2000e+01, 1.8460e+03],
         [1.7803e+04, 2.0000e+00, 3.7000e+01, 1.2636e+04],
         [3.1427e+03, 1.0000e+00, 2.5000e+01,        nan],
         [6.1213e+05, 1.0000e+00, 4.1000e+01, 3.8694e+04],
         [1.0300e+04, 1.0000e+00, 2.5000e+01, 1.2651e+04],
         [2.3491e+04, 1.0000e+00, 6.9000e+01, 4.0378e+04],
         [2.0040e+04, 2.0000e+00, 4.4000e+01, 2.9519e+04

## Housing Data

In [9]:
features_columns = ["longitude", "latitude", "housing_median_age", "total_rooms",  "total_bedrooms", "population", "households",                                "median_income"]
targets_column = ["median_house_value"]

In [10]:
dataset = NAMDataset(config=config,
                    csv_file='data/housing.csv',
                    features_columns=features_columns,
                    targets_column=targets_column)
dataset

<nam.data.base.NAMDataset at 0x7f82a1a94450>

In [11]:
dl = DataLoader(dataset, batch_size=32, shuffle=True)

In [12]:
batch = next(iter(dl))

In [13]:
batch

[tensor([[-1.2184e+02,  3.7380e+01,  3.3000e+01,  8.3500e+02,  1.8100e+02,
           7.8100e+02,  1.6900e+02,  5.1082e+00],
         [-1.1725e+02,  3.2960e+01,  1.8000e+01,  4.7730e+03,  7.4300e+02,
           1.9700e+03,  7.1600e+02,  6.6199e+00],
         [-1.1833e+02,  3.4090e+01,  3.0000e+01,  1.6790e+03,  6.8200e+02,
           1.4450e+03,  5.7900e+02,  2.1403e+00],
         [-1.2196e+02,  3.7290e+01,  2.4000e+01,  1.2400e+03,  2.6300e+02,
           6.9000e+02,  2.7600e+02,  5.0000e+00],
         [-1.1818e+02,  3.4100e+01,  8.0000e+00,  1.1160e+03,  2.6700e+02,
           4.3500e+02,  2.3500e+02,  4.9231e+00],
         [-1.2225e+02,  3.7800e+01,  4.1000e+01,  1.4710e+03,  4.6900e+02,
           1.0620e+03,  4.1300e+02,  1.6121e+00],
         [-1.2321e+02,  3.9150e+01,  3.1000e+01,  2.6850e+03,  6.7500e+02,
           1.3670e+03,  6.2600e+02,  1.6571e+00],
         [-1.2247e+02,  3.7720e+01,  4.7000e+01,  1.1760e+03,  2.8600e+02,
           5.6400e+02,  2.5800e+02,  3.2059e+00],
