In [1]:
# Implementation for Hedge Backpropagation Feed Forward Network for Classification
# Original code in Theano https://github.com/LIBOL/ODL
# Paper https://www.ijcai.org/proceedings/2018/369
#
# By Carlos Muniz Cuza and Jonas Brusokas

In [2]:
from sail.models.torch.onn_hbp import ONNHBPClassifier, _ONNHBPModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.utils import gen_batches
import numpy as np
import torch

### 1. Create dataset for classification

In [3]:
n_data_points = 40
n_features = 15
n_classes = 5

X, y = make_classification(n_samples=n_data_points,
                           n_features=n_features,
                           n_informative=n_classes,
                           random_state=0,
                           n_classes=n_classes,
                           n_clusters_per_class=1)

### 2. Import and check that the model works.

In [4]:
ffn_hidden_units = 16
n_hidden_layers = 2

model = _ONNHBPModel(input_units=n_features,
                     output_units=n_classes,
                     hidden_units=ffn_hidden_units,
                     n_hidden_layers=n_hidden_layers)

output = model(X)
print(output)

tensor([[0.1940, 0.2239, 0.1944, 0.1947, 0.1929],
        [0.1725, 0.2508, 0.2033, 0.1680, 0.2054],
        [0.2108, 0.1958, 0.2110, 0.1953, 0.1870],
        [0.1981, 0.2225, 0.2033, 0.2029, 0.1732],
        [0.1903, 0.2170, 0.1987, 0.1979, 0.1961],
        [0.1957, 0.2227, 0.1993, 0.2069, 0.1754],
        [0.1904, 0.2240, 0.2016, 0.1810, 0.2030],
        [0.1909, 0.2180, 0.2018, 0.2113, 0.1779],
        [0.2067, 0.1996, 0.1823, 0.2242, 0.1872],
        [0.2018, 0.2359, 0.1918, 0.1944, 0.1761],
        [0.1896, 0.2227, 0.2023, 0.2039, 0.1814],
        [0.1918, 0.2134, 0.2070, 0.2103, 0.1775],
        [0.1829, 0.2118, 0.1988, 0.2063, 0.2002],
        [0.2127, 0.2356, 0.1844, 0.1960, 0.1713],
        [0.1961, 0.2072, 0.1877, 0.2174, 0.1916],
        [0.2092, 0.2065, 0.1902, 0.2229, 0.1712],
        [0.1952, 0.2549, 0.1855, 0.1636, 0.2009],
        [0.1786, 0.2528, 0.1905, 0.1802, 0.1979],
        [0.1888, 0.2166, 0.2166, 0.1583, 0.2198],
        [0.1981, 0.2222, 0.2062, 0.1940, 0.1795],


### 3. Check skorch

In [5]:
model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.6178[0m       [32m0.1250[0m        [35m1.6412[0m  0.0037
<class 'sail.models.torch.onn_hbp.ONNHBPClassifier'>[initialized](
  module_=_ONNHBPModel(
    (hidden_layers): ModuleList(
      (0): Linear(in_features=15, out_features=16, bias=True)
      (1): Linear(in_features=16, out_features=16, bias=True)
    )
    (output_layers): ModuleList(
      (0): Linear(in_features=16, out_features=5, bias=True)
      (1): Linear(in_features=16, out_features=5, bias=True)
    )
    (do): Dropout(p=0.2, inplace=False)
    (actfn): ReLU()
  ),
)
[3 4 0 4 3 4 2 4 3 0 4 4 4 3 2 1 2 0 0 4 3 4 3 2 3 4 4 1 0 2 4 0 1 4 0 3 2
 0 0 2]


### 2. Load the Iris dataset

In [6]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

### 3. Train and test ONN on Iris dataset

In [7]:
n_features = X_train.shape[1]
n_classes = np.unique(y_test).shape[0]
ffn_hidden_units = 50
n_hidden_layers = 3

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)
partial_fit = None
for i in range(0,5):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print('Accuracy on the test data', partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0788[0m       [32m0.4167[0m        [35m1.0833[0m  0.0070
      2        [36m1.0770[0m       [32m0.4583[0m        [35m1.0812[0m  0.0079
      3        [36m1.0747[0m       [32m0.5000[0m        [35m1.0791[0m  0.0061
      4        [36m1.0724[0m       [32m0.5833[0m        [35m1.0771[0m  0.0070
      5        [36m1.0702[0m       0.5833        [35m1.0750[0m  0.0060
Accuracy on the test data 0.3


### 4. Mini-batch training.

In [8]:
batch_size = 20

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

for batch in gen_batches(X_train.shape[0], batch_size):
    x_batch = X_train[batch]
    y_batch = y_train[batch]
    partial_fit = model_skorch.partial_fit(x_batch, y_batch)

predict = model_skorch.predict(X_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0855[0m       [32m0.2500[0m        [35m1.1201[0m  0.0040
      2        1.0988       [32m0.5000[0m        [35m1.0544[0m  0.0040
      3        [36m1.0731[0m       0.2500        1.0870  0.0040
      4        1.0864       [32m0.7500[0m        [35m0.9967[0m  0.0040
      5        1.0974       0.0000        1.1621  0.0050
      6        [36m1.0354[0m       0.2500        1.0940  0.0049
0.5




### 5. Improving the results.
Note, the results of doing mini batch learning are very bad. This is because we only do one single epoch. An easy way to improve this is by running partial fit for each mini-batch several times, i.e, multiple epochs.

In [9]:
for _ in range(10): # n_epochs
    for batch in gen_batches(X_train.shape[0], batch_size):
        x_batch = X_train[batch]
        y_batch = y_train[batch]
        partial_fit = model_skorch.partial_fit(x_batch, y_batch)
        # Shuffling the dataset
        permutation = torch.randperm(X_train.shape[0])
        X_train = X_train[permutation]
        y_train = y_train[permutation]

# Note how the results improved considerably
print('Accuracy after 10 epochs', partial_fit.score(X_test, y_test))

      7        1.0800       0.2500        1.1130  0.0050
      8        1.0673       0.5000        1.0301  0.0040
      9        1.0974       0.0000        1.1287  0.0055
     10        1.0492       0.2500        1.1194  0.0053
     11        1.0545       0.2500        1.0659  0.0042
     12        1.0931       0.5000        1.0052  0.0040
     13        1.0839       0.2500        1.0833  0.0050
     14        1.0775       0.2500        1.0886  0.0040
     15        1.0756       0.0000        1.1571  0.0050
     16        1.0483       0.2500        1.0899  0.0050
     17        1.0593       0.5000        1.0103  0.0040
     18        1.1002       0.0000        1.1504  0.0040
     19        1.0442       0.2500        1.0870  0.0040
     20        1.0955       0.2500        1.0729  0.0047
     21        1.0594       0.2500        1.0686  0.0050
     22        1.0765       0.0000        1.1468  0.0040
     23        1.0558       0.2500        1.0805  0.0049
     24        1.0643       0.5



     29        [36m1.0186[0m       0.2500        1.0777  0.0040
     30        1.0302       0.7500        1.0031  0.0040
     31        1.0423       0.5000        1.0139  0.0040
     32        1.0541       0.5000        1.0518  0.0040
     33        1.0671       0.0000        1.1565  0.0049
     34        1.0490       0.5000        1.0086  0.0040
     35        1.0301       0.2500        1.0756  0.0040
     36        1.0328       0.7500        [35m0.9854[0m  0.0038




     37        1.0245       0.2500        1.0627  0.0040
     38        1.0501       0.5000        1.1084  0.0040
     39        1.0812       0.2500        1.0953  0.0039
     40        1.1102       0.2500        1.0965  0.0040
     41        1.0413       0.5000        1.0582  0.0035
     42        [36m1.0053[0m       0.2500        1.0895  0.0040
     43        1.0458       0.5000        [35m0.9594[0m  0.0039
     44        1.0515       0.7500        [35m0.9532[0m  0.0039
     45        1.0536       0.2500        1.1296  0.0050
     46        1.0392       0.5000        1.0469  0.0030
     47        1.0595       0.2500        1.0713  0.0040
     48        1.0463       0.7500        0.9621  0.0039
     49        1.0653       0.0000        1.1104  0.0040
     50        1.0533       0.7500        1.0201  0.0040
     51        1.0287       0.7500        0.9801  0.0040
     52        1.0541       0.2500        1.0905  0.0040
     53        1.0489       0.5000        1.0619  0.0030
    



     61        1.0348       0.5000        1.0196  0.0040
     62        1.0395       0.7500        1.0442  0.0040
     63        1.0102       0.5000        1.0186  0.0039
     64        1.0207       0.7500        1.0561  0.0040
     65        1.0621       0.2500        1.1337  0.0036
     66        0.9850       [32m1.0000[0m        0.9920  0.0040
Accuracy after 10 epochs 0.7666666666666667




## Training and testing simultaneously, one example at the time

In [None]:
X, Y = make_classification(n_samples=50000, n_features=10, n_informative=4, n_redundant=0, n_classes=10,
                           n_clusters_per_class=1, class_sep=3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle=True)

onn_network = ONNHBPClassifier(input_units=10,
                                output_units=10,
                                hidden_units=40,
                                n_hidden_layers=5,
                                train_split=None,
                                verbose=0
                                )

for i in range(len(X_train)):
  partial_fit = onn_network.partial_fit(np.asarray([X_train[i, :]]), np.asarray([y_train[i]]))

  if i % 1000 == 0:
    print("Online Accuracy at time {}: {}".format(i, partial_fit.score(X_test, y_test)))

print('Training and testing finished')

Online Accuracy at time 0: 0.15513333333333335
Online Accuracy at time 1000: 0.9687333333333333
Online Accuracy at time 2000: 0.9754666666666667
Online Accuracy at time 3000: 0.9727333333333333
Online Accuracy at time 4000: 0.9790666666666666
Online Accuracy at time 5000: 0.9747333333333333
Online Accuracy at time 6000: 0.9777333333333333
Online Accuracy at time 7000: 0.9808666666666667
Online Accuracy at time 8000: 0.9779333333333333
Online Accuracy at time 9000: 0.9803333333333333
Online Accuracy at time 10000: 0.981
Online Accuracy at time 11000: 0.981
Online Accuracy at time 12000: 0.9815333333333334
Online Accuracy at time 13000: 0.9826666666666667
Online Accuracy at time 14000: 0.9829333333333333
Online Accuracy at time 15000: 0.9748666666666667
Online Accuracy at time 16000: 0.9817333333333333
Online Accuracy at time 17000: 0.9832
Online Accuracy at time 18000: 0.9814
Online Accuracy at time 19000: 0.9794666666666667
Online Accuracy at time 20000: 0.9820666666666666
Online Accur