# The Forward-Forward Algorithm

Original paper: https://www.cs.toronto.edu/~hinton/FFA13.pdf

In [64]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
from pathlib import Path
from tqdm.notebook import tqdm

import torch
from dataset_utils import BreastCancerLoader, TrainingDatasetFF
from models import FFMultiLayerPerceptron, MultiLayerPerceptron
from tools import base_loss, generate_positive_negative_samples_overlay
from torchvision.transforms import Compose, ToTensor, Lambda, Normalize

In [66]:
train_batch_size = 64
test_batch_size = 64
pos_gen_fn = generate_positive_negative_samples_overlay # which function to use to generate pos neg examples

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1.0 Import Data

In [67]:
bc_loader = BreastCancerLoader()
bc_loader.download_dataset()

In [68]:
train_loader = bc_loader.get_train_loader(train_batch_size)
test_loader = bc_loader.get_test_loader(test_batch_size)

In [69]:
# it takes 10s to prepare all training dataset
train_loader_ff = torch.utils.data.DataLoader(TrainingDatasetFF(pos_gen_fn(X.to(device),
                                                                           Y.to(device),
                                                                           num_classes=2,
                                                                           only_positive=False,
                                                                           replace=False)
                                                                for X, Y in train_loader),
                                              batch_size=train_loader.batch_size, shuffle=True
                                              )


# 2.0 Create Network

In [186]:
## -- Set some variables
hidden_dimensions = [32, 5, 5] # first is input size
activation = torch.nn.ReLU()
layer_optim_learning_rate = 0.001
optimizer = torch.optim.Adam
threshold = 2.0
loss = base_loss 
method = "MSE"
replace = False

In [194]:
mlp_model = FFMultiLayerPerceptron(hidden_dimensions, 
                                  activation,
                                  optimizer,
                                  layer_optim_learning_rate,
                                  threshold,
                                  loss,
                                  method,
                                  replace).to(device)

In [195]:
total_params = 0

for layer in mlp_model.layers:
    # Count the parameters
    num_params = count_parameters(layer)
    total_params += num_params

print("Number of parameters:", total_params)

Number of parameters: 195


## 3.0 Train Network

In [196]:
## -- Set some variables
n_epochs = 5

# choose one of the following training procedures.

In [197]:
torch.manual_seed(0)

<torch._C.Generator at 0x220860c00b0>

## 3.1 Train all layers at the same time

In [198]:
for epoch in tqdm(range(n_epochs)):
    for X_pos, Y_neg in train_loader_ff:
        layer_losses = mlp_model.train_batch(X_pos, Y_neg, before=False)
        print(", ".join(map(lambda i, l: 'Layer {}: {}'.format(i, l),list(range(len(layer_losses))) ,layer_losses)), end='\r')

  0%|          | 0/5 [00:00<?, ?it/s]

Layer 0: 2.215082883834839, Layer 1: 2.22563672065734865

## 3.2 Train one layer at a time

In [16]:
mlp_model.train_batch_progressive(n_epochs, train_loader_ff)

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch: 60/60, Layer 0: 0.15289615094661713

Epoch: 60/60, Layer 1: 0.75259268283843995



# 4.0 Test the Network

In [199]:
acc = 0

for X_train, Y_train in tqdm(train_loader, total=len(train_loader)):
    X_train = X_train.to(device)
    Y_train = Y_train.to(device)

    acc += (mlp_model.predict_accumulate_goodness(X_train,
            pos_gen_fn, n_class=2).eq(Y_train).sum())

print(f"Accuracy: {acc/float(len(bc_loader.train_set)):.4%}")
print(f"Train error: {1 - acc/float(len(bc_loader.train_set)):.4%}")

  0%|          | 0/8 [00:00<?, ?it/s]

Accuracy: 96.0440%
Train error: 3.9560%


In [200]:
acc = 0

for X_test, Y_test in tqdm(test_loader, total=len(test_loader)):
    X_test = X_test.to(device)
    Y_test = Y_test.to(device)

    acc += (mlp_model.predict_accumulate_goodness(X_test,
            pos_gen_fn, n_class=2).eq(Y_test).sum())

print(f"Accuracy: {acc/float(len(bc_loader.test_set)):.4%}")
print(f"Test error: {1 - acc/float(len(bc_loader.test_set)):.4%}")


  0%|          | 0/2 [00:00<?, ?it/s]

Accuracy: 98.2456%
Test error: 1.7544%


# 5.0 Back Propagation

In [155]:
## -- Set some variables
n_epochs= 5
hidden_dimensions = [30, 5, 5, 2]
activation = torch.nn.ReLU()
optimizer = torch.optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()

In [156]:
mlp_backprop_model = MultiLayerPerceptron(hidden_dimensions, activation).to(device)
optimizer = optimizer(mlp_backprop_model.parameters(), lr=0.001)


In [157]:
for epoch in tqdm(range(n_epochs)):
    for i, (X_train, Y_train) in enumerate(train_loader):
        X_train = X_train.to(device)
        Y_train = Y_train.type(torch.LongTensor).to(device)
        
        Y_pred = mlp_backprop_model(X_train)

        loss = loss_fn(Y_pred, Y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Loss: {loss}", end='\r')

  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.6776123046875337

In [158]:
acc = 0
for X_train, Y_train in tqdm(train_loader, total=len(train_loader)):
    X_train = X_train.to(device)
    Y_train = Y_train.to(device)

    acc += (torch.softmax(mlp_backprop_model(X_train), 1).argmax(1).eq(Y_train).sum())

print(f"Accuracy: {acc/float(len(bc_loader.train_set)):.4%}")
print(f"Train error: {1 - acc/float(len(bc_loader.train_set)):.4%}")

  0%|          | 0/8 [00:00<?, ?it/s]

Accuracy: 72.9670%
Train error: 27.0330%


In [159]:
acc = 0
for X_test, Y_test in tqdm(test_loader, total=len(test_loader)):
    X_test = X_test.to(device)
    Y_test = Y_test.to(device)

    acc += (torch.softmax(mlp_backprop_model(X_test), 1).argmax(1).eq(Y_test).sum())

print(f"Accuracy: {acc/float(len(bc_loader.test_set)):.4%}")
print(f"Test error: {1 - acc/float(len(bc_loader.test_set)):.4%}")

  0%|          | 0/2 [00:00<?, ?it/s]

Accuracy: 74.5614%
Test error: 25.4386%


In [49]:
def count_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params


In [160]:
total_params = 0

for layer in mlp_backprop_model.layers:
    # Count the parameters
    num_params = count_parameters(layer)
    total_params += num_params

print("Number of parameters:", total_params)

Number of parameters: 197
