# Background

Simple NN using pyTorch:
    * Classification problem (binary),
    * Based on features like: age, sex, education, marital status, workclass etc. needs to predict whether persons'income  
    is higher or lower than 50k.

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import skorch

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

# DF import

In [20]:
df = pd.read_csv('income.csv')

In [21]:
df.head()

Unnamed: 0,age,sex,education,education-num,marital-status,workclass,occupation,hours-per-week,income,label
0,27,Male,HS-grad,9,Never-married,Private,Craft-repair,40,<=50K,0
1,47,Male,Masters,14,Married,Local-gov,Exec-managerial,50,>50K,1
2,59,Male,HS-grad,9,Divorced,Self-emp,Prof-specialty,20,<=50K,0
3,38,Female,Prof-school,15,Never-married,Federal-gov,Prof-specialty,57,>50K,1
4,64,Female,11th,7,Widowed,Private,Farming-fishing,40,<=50K,0


In [22]:
df['label'].value_counts()

0    21700
1     8300
Name: label, dtype: int64

In [23]:
cat_cols = ['sex', 'education-num', 'marital-status', 'workclass', 'occupation']
num_cols = ['age', 'hours-per-week']
y_col = ['label']

# Standarizing numerical variables

In [24]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    df[cat_cols+num_cols], df[y_col], test_size=.2)

In [26]:
scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

# Encoding categorical variables

In [27]:
encoder = OneHotEncoder(sparse=False, drop='first')

In [28]:
train_encoded = pd.DataFrame(encoder.fit_transform(X_train[cat_cols]), columns=encoder.get_feature_names(cat_cols))
X_train = pd.concat([X_train.reset_index(drop=True), train_encoded], axis=1).drop(columns=cat_cols)

In [29]:
test_encoded = pd.DataFrame(encoder.transform(X_test[cat_cols]), columns=encoder.get_feature_names(cat_cols))
X_test = pd.concat([X_test.reset_index(drop=True), test_encoded], axis=1).drop(columns=cat_cols)

# Changing DF to tensors (using GPU)

In [31]:
X_train_torch = torch.tensor(X_train.values,dtype=torch.float)
X_test_torch = torch.tensor(X_test.values,dtype=torch.float)

y_train_torch = torch.tensor(y_train.values).flatten()
y_test_torch = torch.tensor(y_test.values).flatten()

# Simple neural network

This neural network contains 2 hidden layers - both are fully connected and their activation function is ReLU. 1st hidden layer contains 240 neurons and 2nd - 200.  

Activation function of output layer is log_softmax, because of nature of the problem (classification).

In [32]:
class ClassificationNetwork(nn.Module):
    def __init__(self, in_size=36):
        super().__init__()
        self.fc1 = nn.Linear(in_size, 240)
        self.fc2 = nn.Linear(240, 200)
        self.fc3 = nn.Linear(200, 2)
                
    def forward(self, X):
        X  = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return F.log_softmax(X, dim =1)

# Training 1st model

First model is trained during 2000 epochs. Its loss function is Cross Entropy and optimizer is Adam (gives far better output than SGD).

In [56]:
checkpoint = skorch.callbacks.Checkpoint()
early_stopping = skorch.callbacks.EarlyStopping(patience=50)

In [58]:
net = skorch.NeuralNetClassifier(
module=ClassificationNetwork,
criterion=nn.CrossEntropyLoss,
optimizer=torch.optim.Adam,
lr=.001,
device='cuda',
callbacks=[checkpoint,
          early_stopping],
max_epochs = 150)

In [51]:
from sklearn.model_selection import cross_val_score

In [59]:
net.fit(X_train_torch, y_train_torch)

  epoch    train_loss    valid_acc    valid_loss    cp     dur
-------  ------------  -----------  ------------  ----  ------
      1        [36m0.3011[0m       [32m0.8812[0m        [35m0.2618[0m     +  0.9777
      2        [36m0.2469[0m       [32m0.8827[0m        [35m0.2583[0m     +  0.8612
      3        [36m0.2416[0m       0.8817        [35m0.2574[0m     +  0.9116
      4        [36m0.2381[0m       0.8815        [35m0.2574[0m     +  0.8713
      5        [36m0.2353[0m       0.8823        0.2577        0.8926
      6        [36m0.2329[0m       0.8810        0.2584        0.8560
      7        [36m0.2310[0m       0.8808        0.2597        0.8769
      8        [36m0.2291[0m       0.8804        0.2612        0.8784
      9        [36m0.2273[0m       0.8802        0.2622        0.8894
     10        [36m0.2257[0m       0.8802        0.2636        0.9989
     11        [36m0.2239[0m       0.8804        0.2654        0.8626
     12        [36m0.2224[

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=ClassificationNetwork(
    (fc1): Linear(in_features=36, out_features=240, bias=True)
    (fc2): Linear(in_features=240, out_features=200, bias=True)
    (fc3): Linear(in_features=200, out_features=2, bias=True)
  ),
)

In [64]:
y_pred = net.predict(X_test_torch)

# Evaluation on test data

In [65]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [68]:
y_pred

array([0, 0, 0, ..., 0, 0, 1], dtype=int64)

In [70]:
confusion_matrix(y_test, y_pred)

array([[3783,  495],
       [ 268, 1454]], dtype=int64)

In [72]:
print(f'Test accuracy: {100*accuracy_score(y_test, y_pred):.2f}%')

Test accuracy: 87.28%


Our test accuracy is almost 90% which is very good score. Of course there is a lot of things to improve like:
* using embeddings instead of one hot encoding,
* adding more layers or changing no of neurons.


# Model saved on checkpoint

In [76]:
saved_net = skorch.NeuralNetClassifier(
    module=ClassificationNetwork,
    criterion=nn.CrossEntropyLoss)
saved_net.initialize()
saved_net.load_params(f_params = 'params.pt', f_optimizer='optimizer.pt', f_history='history.json')

In [77]:
y_pred = saved_net.predict(X_test_torch)

In [78]:
confusion_matrix(y_test, y_pred)

array([[3773,  505],
       [ 195, 1527]], dtype=int64)

In [79]:
print(f'Test accuracy: {100*accuracy_score(y_test, y_pred):.2f}%')

Test accuracy: 88.33%


Saved model is a bit better than one fitted in the beggining.