In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline


In [3]:

from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
from sklearn.metrics import r2_score, accuracy_score
import torch 
import torch.nn.functional as F
import torch.nn as nn 
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
from loguru import logger
import matplotlib.pyplot as plt

from nam.config import defaults
from nam.types import Config
from nam.utils.args import parse_args
from nam.data import NAMDataset
from nam.models import DNN, FeatureNN, NAM, get_num_units
from nam.engine import Engine
from nam.utils import graphing

from main import get_config


In [4]:
config = get_config()

features_columns = ["income_2", "WP1219", "WP1220", "weo_gdpc_con_ppp"]
targets_column = ["WP16"]
weights_column = ["wgt"]

data = pd.read_csv('data/GALLUP.csv')
missing = data.isnull().sum()
print(missing)
data = data.fillna(method='ffill')

WP16                    0
wgt                     0
country                 0
income_2                0
WP1219                  1
WP1220                185
year                    0
weo_gdpc_con_ppp    24122
dtype: int64


In [7]:
X = np.array(data[features_columns])
y = np.array(data[targets_column])

scaler = MinMaxScaler()

X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2137)

X_train = torch.from_numpy(X_train.astype('float32'))
X_test = torch.from_numpy(X_test.astype('float32'))
y_train = torch.from_numpy(y_train.reshape(-1, 1).astype('float32'))
y_test = torch.from_numpy(y_test.reshape(-1, 1).astype('float32'))

dataset_train = torch.utils.data.TensorDataset(X_train, y_train)
batch_size = 128
dataset_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

In [13]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(4, 32)
        self.layer2 = nn.Linear(32, 16)
        self.layer3 = nn.Linear(16, 12)
        self.layer4 = nn.Linear(12, 1)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.1)
        self.dropout3 = nn.Dropout(0.1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.dropout1(x)
        x = F.relu(self.layer2(x))
        x = self.dropout2(x)
        x = F.relu(self.layer3(x))
        x = self.dropout3(x)
        x = self.layer4(x)
        return x

nn_model = NeuralNetwork()
nn_model


NeuralNetwork(
  (layer1): Linear(in_features=4, out_features=32, bias=True)
  (layer2): Linear(in_features=32, out_features=16, bias=True)
  (layer3): Linear(in_features=16, out_features=12, bias=True)
  (layer4): Linear(in_features=12, out_features=1, bias=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)

In [14]:
loss_obj = torch.nn.MSELoss()
optimizer = torch.optim.Adam(nn_model.parameters())

In [15]:
for epoch in range(100):
    optimizer.zero_grad()
    i = 0
    for X, y in dataset_train:
        y_pred = nn_model(X)
        loss = loss_obj(y_pred, y)
        loss.backward()
        optimizer.step()
        i+=1
        if not i % 1000:
            print(loss)

tensor(8.6204, grad_fn=<MseLossBackward>)
tensor(8.3854, grad_fn=<MseLossBackward>)
tensor(9.6203, grad_fn=<MseLossBackward>)
tensor(10.5477, grad_fn=<MseLossBackward>)
tensor(11.7218, grad_fn=<MseLossBackward>)
tensor(6.1141, grad_fn=<MseLossBackward>)
tensor(6.0120, grad_fn=<MseLossBackward>)
tensor(7.7470, grad_fn=<MseLossBackward>)
tensor(7.3754, grad_fn=<MseLossBackward>)
tensor(6.4906, grad_fn=<MseLossBackward>)
tensor(4.9904, grad_fn=<MseLossBackward>)
tensor(6.1464, grad_fn=<MseLossBackward>)
tensor(4.9804, grad_fn=<MseLossBackward>)
tensor(5.0341, grad_fn=<MseLossBackward>)
tensor(5.7309, grad_fn=<MseLossBackward>)
tensor(4.4846, grad_fn=<MseLossBackward>)
tensor(4.7525, grad_fn=<MseLossBackward>)
tensor(5.4566, grad_fn=<MseLossBackward>)
tensor(5.6220, grad_fn=<MseLossBackward>)
tensor(4.7918, grad_fn=<MseLossBackward>)
tensor(4.4297, grad_fn=<MseLossBackward>)
tensor(5.6841, grad_fn=<MseLossBackward>)
tensor(5.4914, grad_fn=<MseLossBackward>)
tensor(4.8070, grad_fn=<MseLossB

KeyboardInterrupt: 

In [18]:
y_pred = nn_model(X_test).detach().numpy()
y_true = y_test.detach().numpy()
print(y_true[:10])
print(y_pred[:10])
r2_score(y_true, y_pred)

[[5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]
 [5.7460113]]
[[ 8.]
 [ 4.]
 [10.]
 [ 0.]
 [ 3.]
 [ 5.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 6.]]


-0.010169235857269365