In [1]:
# Implementation for Hedge Backpropagation Feed Forward Network for Classification
# Original code in Theano https://github.com/LIBOL/ODL
# Paper https://www.ijcai.org/proceedings/2018/369
#
# By Carlos Muniz Cuza and Jonas Brusokas

In [1]:
from sail.models.torch.onn_hbp import ONNHBPClassifier, _ONNHBPModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.utils import gen_batches
import numpy as np
import torch

### 1. Create dataset for classification

In [2]:
n_data_points = 40
n_features = 15
n_classes = 5

X, y = make_classification(n_samples=n_data_points,
                           n_features=n_features,
                           n_informative=n_classes,
                           random_state=0,
                           n_classes=n_classes,
                           n_clusters_per_class=1)

### 2. Import and check that the model works.

In [3]:
ffn_hidden_units = 16
n_hidden_layers = 2

model = _ONNHBPModel(input_units=n_features,
                     output_units=n_classes,
                     hidden_units=ffn_hidden_units,
                     n_hidden_layers=n_hidden_layers)

output = model(X)
print(output)

tensor([[0.2116, 0.2062, 0.1967, 0.1779, 0.2076],
        [0.2117, 0.1961, 0.1889, 0.1718, 0.2315],
        [0.2109, 0.2256, 0.1873, 0.1811, 0.1951],
        [0.3190, 0.1672, 0.1347, 0.2030, 0.1762],
        [0.1945, 0.2129, 0.1792, 0.1830, 0.2304],
        [0.3141, 0.2072, 0.1443, 0.1880, 0.1464],
        [0.2111, 0.2063, 0.1856, 0.1732, 0.2239],
        [0.2336, 0.2156, 0.1737, 0.1904, 0.1868],
        [0.2515, 0.2115, 0.1750, 0.1798, 0.1822],
        [0.1954, 0.2239, 0.1939, 0.1767, 0.2100],
        [0.2230, 0.2158, 0.1808, 0.1869, 0.1934],
        [0.2809, 0.1736, 0.1598, 0.1945, 0.1912],
        [0.2528, 0.2040, 0.1729, 0.1836, 0.1867],
        [0.2434, 0.2121, 0.1826, 0.1662, 0.1957],
        [0.2164, 0.2112, 0.1817, 0.1829, 0.2077],
        [0.3155, 0.1824, 0.1458, 0.1908, 0.1655],
        [0.2168, 0.2082, 0.1922, 0.1770, 0.2059],
        [0.1856, 0.2151, 0.2069, 0.1703, 0.2221],
        [0.1916, 0.2130, 0.2068, 0.1745, 0.2141],
        [0.2249, 0.2075, 0.1997, 0.1767, 0.1912],


### 3. Check skorch

In [4]:
model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.6549[0m       [32m0.2500[0m        [35m1.6407[0m  0.0282
<class 'sail.models.torch.onn_hbp.ONNHBPClassifier'>[initialized](
  module_=_ONNHBPModel(
    (hidden_layers): ModuleList(
      (0): Linear(in_features=15, out_features=16, bias=True)
      (1): Linear(in_features=16, out_features=16, bias=True)
    )
    (output_layers): ModuleList(
      (0-1): 2 x Linear(in_features=16, out_features=5, bias=True)
    )
    (do): Dropout(p=0.2, inplace=False)
    (actfn): ReLU()
  ),
)
[2 2 3 3 2 3 2 2 3 2 3 4 4 3 2 2 2 3 3 2 3 3 2 2 2 3 3 2 2 3 2 3 3 3 3 3 2
 2 2 2]


### 2. Load the Iris dataset

In [6]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

### 3. Train and test ONN on Iris dataset

In [7]:
n_features = X_train.shape[1]
n_classes = np.unique(y_test).shape[0]
ffn_hidden_units = 50
n_hidden_layers = 3

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)
partial_fit = None
for i in range(0,5):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print('Accuracy on the test data', partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0350[0m       [32m0.6250[0m        [35m1.0384[0m  0.0127
      2        [36m1.0338[0m       0.6250        [35m1.0368[0m  0.0076
      3        [36m1.0323[0m       0.6250        [35m1.0353[0m  0.0088
      4        [36m1.0307[0m       0.6250        [35m1.0337[0m  0.0064
      5        [36m1.0292[0m       0.6250        [35m1.0322[0m  0.0062
Accuracy on the test data 0.6666666666666666


### 4. Mini-batch training.

In [8]:
batch_size = 20

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

for batch in gen_batches(X_train.shape[0], batch_size):
    x_batch = X_train[batch]
    y_batch = y_train[batch]
    partial_fit = model_skorch.partial_fit(x_batch, y_batch)

predict = model_skorch.predict(X_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0705[0m       [32m0.5000[0m        [35m1.0725[0m  0.0062
      2        [36m1.0681[0m       [32m1.0000[0m        [35m1.0519[0m  0.0054
      3        1.0705       0.5000        1.0779  0.0070
      4        [36m1.0613[0m       0.7500        1.0586  0.0088
      5        1.0803       0.2500        1.0811  0.0356
      6        [36m1.0574[0m       0.7500        [35m1.0480[0m  0.0135
0.8




### 5. Improving the results.
Note, the results of doing mini batch learning are very bad. This is because we only do one single epoch. An easy way to improve this is by running partial fit for each mini-batch several times, i.e, multiple epochs.

In [9]:
for _ in range(10): # n_epochs
    for batch in gen_batches(X_train.shape[0], batch_size):
        x_batch = X_train[batch]
        y_batch = y_train[batch]
        partial_fit = model_skorch.partial_fit(x_batch, y_batch)
        # Shuffling the dataset
        permutation = torch.randperm(X_train.shape[0])
        X_train = X_train[permutation]
        y_train = y_train[permutation]

# Note how the results improved considerably
print('Accuracy after 10 epochs', partial_fit.score(X_test, y_test))

      7        1.0629       0.7500        1.0639  0.0071
      8        [36m1.0551[0m       0.7500        1.0548  0.0059
      9        [36m1.0480[0m       0.7500        1.0540  0.0059
     10        1.0694       0.7500        1.0592  0.0069
     11        1.0621       0.5000        1.0609  0.0064
     12        1.0643       0.7500        1.0501  0.0076
     13        [36m1.0413[0m       0.7500        [35m1.0460[0m  0.0076
     14        1.0578       0.5000        1.0539  0.0130
     15        1.0501       0.7500        [35m1.0376[0m  0.0062
     16        1.0568       0.5000        1.0707  0.0081
     17        [36m1.0340[0m       0.7500        1.0491  0.0080
     18        1.0454       0.7500        1.0589  0.0106
     19        1.0531       0.7500        1.0624  0.0119
     20        1.0519       0.7500        [35m1.0343[0m  0.0119
     21        1.0426       0.7500        1.0396  0.0120




     22        [36m1.0242[0m       1.0000        [35m1.0154[0m  0.0272
     23        1.0521       0.7500        1.0499  0.0339
     24        1.0368       0.7500        1.0312  0.0172
     25        1.0511       0.5000        1.0520  0.0107
     26        1.0354       1.0000        1.0213  0.0059
     27        1.0339       0.5000        1.0395  0.0123
     28        1.0407       0.7500        1.0207  0.0097
     29        1.0277       0.7500        1.0337  0.0099
     30        1.0302       1.0000        [35m0.9946[0m  0.0108
     31        [36m1.0239[0m       0.5000        1.0545  0.0111
     32        1.0464       0.7500        1.0310  0.0084
     33        1.0308       0.7500        1.0388  0.0080
     34        1.0401       0.5000        1.0400  0.0091
     35        1.0258       0.7500        1.0410  0.0077
     36        [36m1.0226[0m       0.5000        1.0405  0.0070




     37        [36m1.0167[0m       0.7500        1.0123  0.0079
     38        [36m1.0081[0m       1.0000        0.9985  0.0096
     39        1.0296       0.7500        1.0227  0.0074
     40        [36m1.0056[0m       0.7500        1.0049  0.0130
     41        1.0208       0.7500        1.0267  0.0079
     42        1.0310       0.5000        1.0374  0.0080
     43        [36m1.0003[0m       1.0000        [35m0.9912[0m  0.0062
     44        1.0283       0.7500        1.0318  0.0078
     45        1.0146       0.5000        1.0337  0.0124
     46        [36m0.9947[0m       0.7500        1.0273  0.0066
     47        0.9968       0.7500        0.9982  0.0075
     48        0.9969       1.0000        [35m0.9796[0m  0.0084
     49        1.0097       0.5000        1.0246  0.0086
     50        [36m0.9916[0m       0.5000        1.0332  0.0144
     51        1.0040       0.7500        0.9982  0.0069




     52        1.0228       0.5000        0.9908  0.0098
     53        1.0004       0.7500        0.9898  0.0107
     54        1.0264       0.7500        0.9890  0.0135
     55        0.9938       1.0000        [35m0.9557[0m  0.0140
     56        1.0082       0.7500        1.0022  0.0112
     57        [36m0.9900[0m       0.5000        1.0302  0.0114
     58        1.0154       0.7500        1.0071  0.0094
     59        1.0038       0.7500        0.9759  0.0088
     60        [36m0.9898[0m       0.7500        0.9693  0.0086
     61        0.9999       0.7500        0.9992  0.0089




     62        1.0085       0.7500        1.0005  0.0089
     63        0.9931       1.0000        0.9719  0.0075
     64        0.9947       0.7500        0.9863  0.0090
     65        0.9955       0.7500        0.9900  0.0310
     66        0.9900       1.0000        0.9757  0.0114
Accuracy after 10 epochs 0.8333333333333334




## Training and testing simultaneously, one example at the time

In [10]:
X, Y = make_classification(n_samples=5000, n_features=10, n_informative=4, n_redundant=0, n_classes=10,
                           n_clusters_per_class=1, class_sep=3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle=True)

onn_network = ONNHBPClassifier(input_units=10,
                                output_units=10,
                                hidden_units=40,
                                n_hidden_layers=5,
                                train_split=None,
                                verbose=0
                                )

n_training_samples = len(X_train)
for i in range(n_training_samples):
  partial_fit = onn_network.partial_fit(np.asarray([X_train[i, :]]), np.asarray([y_train[i]]))

  if i % 1000 == 0:
    print("Online Accuracy at time {}/{}: {}".format(i, n_training_samples, partial_fit.score(X_test, y_test)))

print('Training and testing finished.\nFinal accuracy after {} samples: {}'.format(n_training_samples, partial_fit.score(X_test, y_test)))

Online Accuracy at time 0/3500: 0.04533333333333334
Online Accuracy at time 1000/3500: 0.9726666666666667
Online Accuracy at time 2000/3500: 0.9686666666666667
Online Accuracy at time 3000/3500: 0.9786666666666667
Training and testing finished.
Final accuracy after 3500 samples: 0.9786666666666667
