In [1]:
# Implementation for Hedge Backpropagation Feed Forward Network for Classification
# Original code in Theano https://github.com/LIBOL/ODL
# Paper https://www.ijcai.org/proceedings/2018/369
#
# By Carlos Muniz Cuza and Jonas Brusokas

In [35]:
from sail.models.torch.onn_hbp import ONNHBPClassifier, _ONNHBPModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.utils import gen_batches
import numpy as np
import torch

### 1. Create dataset for classification

In [36]:
n_data_points = 40
n_features = 15
n_classes = 5

X, y = make_classification(n_samples=n_data_points,
                           n_features=n_features,
                           n_informative=n_classes,
                           random_state=0,
                           n_classes=n_classes,
                           n_clusters_per_class=1)

### 2. Import and check that the model works.

In [37]:
ffn_hidden_units = 16
n_hidden_layers = 2

model = _ONNHBPModel(input_units=n_features,
                     output_units=n_classes,
                     hidden_units=ffn_hidden_units,
                     n_hidden_layers=n_hidden_layers)

output = model(X)
print(output)

tensor([[0.2028, 0.2119, 0.1731, 0.2008, 0.2113],
        [0.2174, 0.2008, 0.1867, 0.1922, 0.2029],
        [0.2112, 0.2137, 0.1833, 0.1911, 0.2006],
        [0.1812, 0.2151, 0.1379, 0.1811, 0.2846],
        [0.2068, 0.2109, 0.1998, 0.1806, 0.2018],
        [0.1688, 0.2421, 0.1295, 0.1934, 0.2662],
        [0.2279, 0.1915, 0.1990, 0.1921, 0.1896],
        [0.1972, 0.2105, 0.1696, 0.1980, 0.2247],
        [0.1904, 0.2384, 0.1591, 0.1908, 0.2213],
        [0.2499, 0.1884, 0.1747, 0.2038, 0.1832],
        [0.1886, 0.2287, 0.1712, 0.1905, 0.2209],
        [0.2206, 0.2029, 0.1614, 0.1784, 0.2367],
        [0.2196, 0.2187, 0.1439, 0.1663, 0.2516],
        [0.2143, 0.2108, 0.1671, 0.2144, 0.1935],
        [0.2026, 0.2315, 0.1474, 0.1899, 0.2286],
        [0.2032, 0.2352, 0.1026, 0.1640, 0.2950],
        [0.2026, 0.2234, 0.1760, 0.1980, 0.2001],
        [0.2393, 0.1916, 0.1684, 0.1944, 0.2063],
        [0.1923, 0.2010, 0.2037, 0.1823, 0.2207],
        [0.2161, 0.1852, 0.2002, 0.2029, 0.1955],


### 3. Check skorch

In [38]:
model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.7685[0m       [32m0.0000[0m        [35m1.7679[0m  0.0030
<class 'sail.models.torch.onn_hbp.ONNHBPClassifier'>[initialized](
  module_=_ONNHBPModel(
    (hidden_layers): ModuleList(
      (0): Linear(in_features=15, out_features=16, bias=True)
      (1): Linear(in_features=16, out_features=16, bias=True)
    )
    (output_layers): ModuleList(
      (0): Linear(in_features=16, out_features=5, bias=True)
      (1): Linear(in_features=16, out_features=5, bias=True)
    )
    (do): Dropout(p=0.2, inplace=False)
    (actfn): ReLU()
  ),
)
[2 4 4 4 1 4 2 4 4 4 4 4 4 4 4 3 2 4 4 4 2 0 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4
 4 2 2]


### 2. Load the Iris dataset

In [39]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

### 3. Train and test ONN on Iris dataset

In [40]:
n_features = X_train.shape[1]
n_classes = np.unique(y_test).shape[0]
ffn_hidden_units = 50
n_hidden_layers = 3

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)
partial_fit = None
for i in range(0,5):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print('Accuracy on the test data', partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.1316[0m       [32m0.1667[0m        [35m1.1229[0m  0.0044
      2        [36m1.1293[0m       0.1667        [35m1.1200[0m  0.0055
      3        [36m1.1262[0m       0.1667        [35m1.1171[0m  0.0053
      4        [36m1.1231[0m       0.1667        [35m1.1142[0m  0.0045
      5        [36m1.1201[0m       0.1667        [35m1.1114[0m  0.0047
Accuracy on the test data 0.1


### 4. Mini-batch training.

In [41]:
batch_size = 20

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

for batch in gen_batches(X_train.shape[0], batch_size):
    x_batch = X_train[batch]
    y_batch = y_train[batch]
    partial_fit = model_skorch.partial_fit(x_batch, y_batch)

predict = model_skorch.predict(X_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0839[0m       [32m0.7500[0m        [35m1.0656[0m  0.0040
      2        [36m1.0649[0m       0.7500        [35m1.0500[0m  0.0040
      3        1.0705       0.5000        1.0944  0.0041
      4        [36m1.0624[0m       0.7500        1.0834  0.0037
      5        1.0896       0.2500        1.1075  0.0050
      6        [36m1.0624[0m       0.7500        1.0618  0.0045
0.7




### 5. Improving the results.
Note, the results of doing mini batch learning are very bad. This is because we only do one single epoch. An easy way to improve this is by running partial fit for each mini-batch several times, i.e, multiple epochs.

In [42]:
for _ in range(10): # n_epochs
    for batch in gen_batches(X_train.shape[0], batch_size):
        x_batch = X_train[batch]
        y_batch = y_train[batch]
        partial_fit = model_skorch.partial_fit(x_batch, y_batch)
        # Shuffling the dataset
        permutation = torch.randperm(X_train.shape[0])
        X_train = X_train[permutation]
        y_train = y_train[permutation]

# Note how the results improved considerably
print('Accuracy after 10 epochs', partial_fit.score(X_test, y_test))

      7        1.0761       0.7500        1.0581  0.0040
      8        1.0671       0.7500        1.0644  0.0041
      9        1.0706       0.5000        1.0645  0.0043
     10        [36m1.0427[0m       0.7500        1.0555  0.0032
     11        1.0671       0.7500        1.0602  0.0037
     12        1.0587       0.5000        1.0611  0.0046
     13        1.0530       0.5000        1.0627  0.0042
     14        1.0692       0.5000        1.0713  0.0050
     15        1.0647       0.7500        [35m1.0417[0m  0.0042
     16        1.0469       0.7500        [35m1.0326[0m  0.0069
     17        1.0596       0.5000        1.0672  0.0036
     18        1.0662       0.7500        [35m1.0301[0m  0.0050
     19        1.0458       0.5000        1.0885  0.0035
     20        1.0548       0.7500        1.0331  0.0042
     21        1.0516       0.7500        [35m1.0218[0m  0.0042
     22        1.0622       0.7500        1.0266  0.0042
     23        1.0532       0.7500        1



     29        1.0484       0.2500        1.0807  0.0047
     30        [36m1.0376[0m       0.7500        [35m1.0128[0m  0.0044
     31        1.0407       0.7500        1.0133  0.0042
     32        1.0487       0.7500        1.0355  0.0047
     33        1.0502       0.7500        1.0287  0.0049
     34        [36m1.0193[0m       0.5000        1.0495  0.0043
     35        [36m1.0169[0m       0.5000        1.0395  0.0041
     36        1.0504       0.7500        [35m0.9984[0m  0.0044
     37        1.0332       0.7500        1.0347  0.0046




     38        1.0254       0.7500        1.0412  0.0048
     39        1.0400       0.7500        0.9988  0.0041
     40        1.0463       0.5000        1.0451  0.0039
     41        1.0432       0.7500        1.0092  0.0042
     42        1.0205       0.5000        1.0473  0.0041
     43        1.0170       0.7500        1.0506  0.0050
     44        [36m1.0070[0m       0.7500        1.0206  0.0052
     45        1.0265       0.7500        1.0172  0.0036
     46        1.0177       0.7500        1.0043  0.0042
     47        1.0273       0.5000        1.0458  0.0038
     48        1.0331       [32m1.0000[0m        [35m0.9978[0m  0.0039
     49        [36m1.0044[0m       0.7500        1.0033  0.0038
     50        1.0169       0.5000        1.0309  0.0039
     51        1.0281       0.5000        1.0220  0.0040
     52        1.0265       1.0000        1.0044  0.0043
     53        1.0151       0.7500        [35m0.9770[0m  0.0035
     54        [36m0.9938[0m       1.0000



     61        1.0123       0.7500        0.9853  0.0062
     62        1.0108       0.7500        1.0003  0.0036
     63        0.9969       1.0000        0.9955  0.0039
     64        1.0109       0.5000        1.0201  0.0041
     65        0.9995       0.5000        0.9979  0.0037
     66        1.0151       0.7500        0.9780  0.0050
Accuracy after 10 epochs 0.7666666666666667


## Training and testing simultaneously, one example at the time

In [43]:
X, Y = make_classification(n_samples=5000, n_features=10, n_informative=4, n_redundant=0, n_classes=10,
                           n_clusters_per_class=1, class_sep=3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle=True)

onn_network = ONNHBPClassifier(input_units=10,
                                output_units=10,
                                hidden_units=40,
                                n_hidden_layers=5,
                                train_split=None,
                                verbose=0
                                )

n_training_samples = len(X_train)
for i in range(n_training_samples):
  partial_fit = onn_network.partial_fit(np.asarray([X_train[i, :]]), np.asarray([y_train[i]]))

  if i % 1000 == 0:
    print("Online Accuracy at time {}/{}: {}".format(i, n_training_samples, partial_fit.score(X_test, y_test)))

print('Training and testing finished.\nFinal accuracy after {} samples: {}'.format(n_training_samples, partial_fit.score(X_test, y_test)))

Training and testing finished.
Final accuracy after 3500 samples: 0.9633333333333334
Online Accuracy at time 0/3500: 0.05733333333333333
Online Accuracy at time 1000/3500: 0.9746666666666667
Online Accuracy at time 2000/3500: 0.98
Online Accuracy at time 3000/3500: 0.9806666666666667
Training and testing finished.
Final accuracy after 3500 samples: 0.9793333333333333
