In [1]:
# Implementation for Hedge Backpropagation Feed Forward Network for Classification
# Original code in Theano https://github.com/LIBOL/ODL
# Paper https://www.ijcai.org/proceedings/2018/369
#
# By Carlos Muniz Cuza and Jonas Brusokas

In [2]:
from sail.models.torch.onn_hbp import ONNHBPClassifier, _ONNHBPModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.utils import gen_batches
import numpy as np
import torch

### 1. Create dataset for classification

In [3]:
n_data_points = 40
n_features = 15
n_classes = 5

X, y = make_classification(n_samples=n_data_points,
                           n_features=n_features,
                           n_informative=n_classes,
                           random_state=0,
                           n_classes=n_classes,
                           n_clusters_per_class=1)

### 2. Import and check that the model works.

In [4]:
ffn_hidden_units = 16
n_hidden_layers = 2

model = _ONNHBPModel(input_units=n_features,
                     output_units=n_classes,
                     hidden_units=ffn_hidden_units,
                     n_hidden_layers=n_hidden_layers)

output = model(X)
print(output)

tensor([[0.1721, 0.2268, 0.2109, 0.1938, 0.1964],
        [0.1458, 0.2603, 0.2098, 0.1916, 0.1926],
        [0.1701, 0.2274, 0.2360, 0.1990, 0.1674],
        [0.1733, 0.2240, 0.2286, 0.1826, 0.1915],
        [0.1572, 0.2496, 0.2007, 0.1850, 0.2075],
        [0.1709, 0.2222, 0.1919, 0.2251, 0.1898],
        [0.1546, 0.2483, 0.2395, 0.1720, 0.1856],
        [0.1699, 0.2345, 0.2159, 0.1945, 0.1852],
        [0.1695, 0.2266, 0.1980, 0.2127, 0.1933],
        [0.1941, 0.2117, 0.2136, 0.1689, 0.2117],
        [0.1774, 0.2052, 0.1903, 0.2231, 0.2039],
        [0.1739, 0.2297, 0.2302, 0.1814, 0.1848],
        [0.1877, 0.2086, 0.2031, 0.1998, 0.2008],
        [0.1855, 0.2039, 0.2237, 0.2099, 0.1770],
        [0.1718, 0.2304, 0.2106, 0.1862, 0.2010],
        [0.1630, 0.2471, 0.2163, 0.1884, 0.1851],
        [0.1607, 0.2234, 0.2369, 0.1790, 0.2000],
        [0.1849, 0.2164, 0.2054, 0.1943, 0.1990],
        [0.1866, 0.2091, 0.2478, 0.1913, 0.1651],
        [0.1706, 0.2273, 0.2305, 0.1941, 0.1775],


### 3. Check skorch

In [5]:
model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m1.6667[0m  0.0189
<class 'sail.models.torch.onn_hbp.ONNHBPClassifier'>[initialized](
  module_=_ONNHBPModel(
    (hidden_layers): ModuleList(
      (0): Linear(in_features=15, out_features=16, bias=True)
      (1): Linear(in_features=16, out_features=16, bias=True)
    )
    (output_layers): ModuleList(
      (0-1): 2 x Linear(in_features=16, out_features=5, bias=True)
    )
    (do): Dropout(p=0.2, inplace=False)
    (actfn): ReLU()
  ),
)
[0 0 0 3 0 0 0 0 3 2 0 3 3 2 3 3 3 3 0 0 2 3 2 0 0 0 0 0 3 0 2 1 3 2 2 3 3
 0 1 1]


### 2. Load the Iris dataset

In [6]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

### 3. Train and test ONN on Iris dataset

In [7]:
n_features = X_train.shape[1]
n_classes = np.unique(y_test).shape[0]
ffn_hidden_units = 50
n_hidden_layers = 3

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)
partial_fit = None
for i in range(0,5):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print('Accuracy on the test data', partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m1.0647[0m  0.0039
      2        [36m1.0629[0m  0.0069
      3        [36m1.0606[0m  0.0062
      4        [36m1.0583[0m  0.0070
      5        [36m1.0560[0m  0.0062
Accuracy on the test data 0.7


### 4. Mini-batch training.

In [8]:
batch_size = 20

model_skorch = ONNHBPClassifier(input_units=n_features,
                                 output_units=n_classes,
                                 hidden_units=ffn_hidden_units,
                                 n_hidden_layers=n_hidden_layers)

for batch in gen_batches(X_train.shape[0], batch_size):
    x_batch = X_train[batch]
    y_batch = y_train[batch]
    partial_fit = model_skorch.partial_fit(x_batch, y_batch)

predict = model_skorch.predict(X_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m1.1313[0m  0.0028
      2        1.1428  0.0067
      3        [36m1.1297[0m  0.0045
      4        1.1298  0.0038
      5        [36m1.1133[0m  0.0088
      6        1.1667  0.0054
0.3333333333333333


### 5. Improving the results.
Note, the results of doing mini batch learning are very bad. This is because we only do one single epoch. An easy way to improve this is by running partial fit for each mini-batch several times, i.e, multiple epochs.

In [9]:
for _ in range(10): # n_epochs
    for batch in gen_batches(X_train.shape[0], batch_size):
        x_batch = X_train[batch]
        y_batch = y_train[batch]
        partial_fit = model_skorch.partial_fit(x_batch, y_batch)
        # Shuffling the dataset
        permutation = torch.randperm(X_train.shape[0])
        X_train = X_train[permutation]
        y_train = y_train[permutation]

# Note how the results improved considerably
print('Accuracy after 10 epochs', partial_fit.score(X_test, y_test))

      7        1.1206  0.0044
      8        1.1451  0.0038
      9        1.1512  0.0042
     10        1.1198  0.0088
     11        [36m1.1128[0m  0.0036
     12        1.1842  0.0036
     13        [36m1.0931[0m  0.0041
     14        1.1110  0.0025
     15        1.1176  0.0040
     16        1.1296  0.0022
     17        1.1264  0.0029
     18        1.1322  0.0027
     19        1.1195  0.0026
     20        [36m1.0751[0m  0.0037
     21        1.1132  0.0035
     22        1.1027  0.0043
     23        1.1161  0.0037
     24        1.0928  0.0029
     25        1.1117  0.0033
     26        1.1075  0.0041
     27        1.0975  0.0031
     28        [36m1.0603[0m  0.0025
     29        1.0973  0.0039
     30        1.0699  0.0033
     31        [36m1.0593[0m  0.0038
     32        1.0922  0.0035
     33        1.0774  0.0036
     34        1.0789  0.0033
     35        [36m1.0591[0m  0.0025
     36        1.0739  0.0039
     37        1.1004  0.0023
     38        1

## Training and testing simultaneously, one example at the time

In [10]:
X, Y = make_classification(n_samples=5000, n_features=10, n_informative=4, n_redundant=0, n_classes=10,
                           n_clusters_per_class=1, class_sep=3)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle=True)

onn_network = ONNHBPClassifier(input_units=10,
                                output_units=10,
                                hidden_units=40,
                                n_hidden_layers=5,
                                train_split=None,
                                verbose=0
                                )

n_training_samples = len(X_train)
for i in range(n_training_samples):
  partial_fit = onn_network.partial_fit(np.asarray([X_train[i, :]]), np.asarray([y_train[i]]))

  if i % 1000 == 0:
    print("Online Accuracy at time {}/{}: {}".format(i, n_training_samples, partial_fit.score(X_test, y_test)))

print('Training and testing finished.\nFinal accuracy after {} samples: {}'.format(n_training_samples, partial_fit.score(X_test, y_test)))

Online Accuracy at time 0/3500: 0.07666666666666666
Online Accuracy at time 1000/3500: 0.9693333333333334
Online Accuracy at time 2000/3500: 0.9633333333333334
Online Accuracy at time 3000/3500: 0.9766666666666667
Training and testing finished.
Final accuracy after 3500 samples: 0.97
