# The Forward-Forward Algorithm

Original paper: https://www.cs.toronto.edu/~hinton/FFA13.pdf

In [1]:
from pathlib import Path
from tqdm.notebook import tqdm

import torch
from dataset_utils import BreastCancerLoader, TrainingDatasetFF, WineQualityLoader
from models import FFMultiLayerPerceptron, MultiLayerPerceptron
from tools import base_loss, generate_positive_negative_samples_overlay
from torchvision.transforms import Compose, ToTensor, Lambda, Normalize

In [2]:
dim_size = 50

In [3]:
train_batch_size = 64
test_batch_size = 64
pos_gen_fn = generate_positive_negative_samples_overlay # which function to use to generate pos neg examples

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1.0 Import Data

In [4]:
bc_loader = BreastCancerLoader()
bc_loader.download_dataset()

In [5]:
train_loader = bc_loader.get_train_loader(train_batch_size)
test_loader = bc_loader.get_test_loader(test_batch_size)

In [6]:
# it takes 10s to prepare all training dataset
train_loader_ff = torch.utils.data.DataLoader(TrainingDatasetFF(pos_gen_fn(X.to(device),
                                                                           Y.to(device),
                                                                           num_classes=2,
                                                                           only_positive=False,
                                                                           replace=False)
                                                                for X, Y in train_loader),
                                              batch_size=train_loader.batch_size, shuffle=True
                                              )


In [7]:
for x, y in train_loader_ff:
    print(x.shape)
    break

torch.Size([64, 32])


# 2.0 Create Network

In [8]:
## -- Set some variables
hidden_dimensions = [32, dim_size] # first is input size
activation = torch.nn.ReLU()
layer_optim_learning_rate = 0.04
optimizer = torch.optim.Adam
threshold = 2.0
loss = base_loss 
method = "MSE"
replace = False

In [9]:
mlp_model = None

In [10]:
torch.manual_seed(10)

mlp_model = FFMultiLayerPerceptron(hidden_dimensions, 
                                  activation,
                                  optimizer,
                                  layer_optim_learning_rate,
                                  threshold,
                                  loss,
                                  method,
                                  replace).to(device)

In [11]:
def count_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params


In [12]:
total_params = 0

for layer in mlp_model.layers:
    # Count the parameters
    num_params = count_parameters(layer)
    total_params += num_params

print("Number of parameters:", total_params)

Number of parameters: 1650


## 3.0 Train Network

In [13]:
## -- Set some variables
n_epochs = 60

# choose one of the following training procedures.

## 3.1 Train all layers at the same time

In [14]:
for epoch in tqdm(range(n_epochs)):
    for X_pos, Y_neg in train_loader_ff:
        layer_losses = mlp_model.train_batch(X_pos, Y_neg, before=False)
        print(", ".join(map(lambda i, l: 'Layer {}: {}'.format(i, l),list(range(len(layer_losses))) ,layer_losses)), end='\r')

  0%|          | 0/60 [00:00<?, ?it/s]

Layer 0: 0.17133615911006927

## 3.2 Train one layer at a time

In [15]:
#mlp_model.train_batch_progressive(n_epochs, train_loader_ff)

# 4.0 Test the Network

In [17]:
acc = 0

for X_train, Y_train in tqdm(train_loader, total=len(train_loader)):
    X_train = X_train.to(device)
    Y_train = Y_train.to(device)

    acc += (mlp_model.predict_accumulate_goodness(X_train,
            pos_gen_fn, n_class=2).eq(Y_train).sum())

print(f"Accuracy: {acc/float(len(bc_loader.train_set)):.4%}")
print(f"Train error: {1 - acc/float(len(bc_loader.train_set)):.4%}")

  0%|          | 0/8 [00:00<?, ?it/s]

Accuracy: 98.9011%
Train error: 1.0989%


In [18]:
acc = 0

for X_test, Y_test in tqdm(test_loader, total=len(test_loader)):
    X_test = X_test.to(device)
    Y_test = Y_test.to(device)

    acc += (mlp_model.predict_accumulate_goodness(X_test,
            pos_gen_fn, n_class=2).eq(Y_test).sum())

print(f"Accuracy: {acc/float(len(bc_loader.test_set)):.4%}")
print(f"Test error: {1 - acc/float(len(bc_loader.test_set)):.4%}")


  0%|          | 0/2 [00:00<?, ?it/s]

Accuracy: 98.2456%
Test error: 1.7544%


# 5.0 Back Propagation

In [19]:
## -- Set some variables
hidden_dimensions = [30, dim_size, 2]
activation = torch.nn.ReLU()
optimizer = torch.optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()

In [20]:
torch.manual_seed(10)

mlp_backprop_model = MultiLayerPerceptron(hidden_dimensions, activation).to(device)
optimizer = optimizer(mlp_backprop_model.parameters(), lr=0.001)


In [21]:
for epoch in tqdm(range(n_epochs)):
    for i, (X_train, Y_train) in enumerate(train_loader):
        X_train = X_train.to(device)
        Y_train = Y_train.type(torch.LongTensor).to(device)
        
        Y_pred = mlp_backprop_model(X_train)

        loss = loss_fn(Y_pred, Y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Loss: {loss}", end='\r')

  0%|          | 0/60 [00:00<?, ?it/s]

Loss: 0.0062216822989284993

In [22]:
acc = 0
for X_train, Y_train in tqdm(train_loader, total=len(train_loader)):
    X_train = X_train.to(device)
    Y_train = Y_train.to(device)

    acc += (torch.softmax(mlp_backprop_model(X_train), 1).argmax(1).eq(Y_train).sum())

print(f"Accuracy: {acc/float(len(bc_loader.train_set)):.4%}")
print(f"Train error: {1 - acc/float(len(bc_loader.train_set)):.4%}")

  0%|          | 0/8 [00:00<?, ?it/s]

Accuracy: 99.3407%
Train error: 0.6593%


In [23]:
acc = 0
for X_test, Y_test in tqdm(test_loader, total=len(test_loader)):
    X_test = X_test.to(device)
    Y_test = Y_test.to(device)

    acc += (torch.softmax(mlp_backprop_model(X_test), 1).argmax(1).eq(Y_test).sum())

print(f"Accuracy: {acc/float(len(bc_loader.test_set)):.4%}")
print(f"Test error: {1 - acc/float(len(bc_loader.test_set)):.4%}")

  0%|          | 0/2 [00:00<?, ?it/s]

Accuracy: 98.2456%
Test error: 1.7544%


In [24]:
total_params = 0

for layer in mlp_backprop_model.layers:
    # Count the parameters
    num_params = count_parameters(layer)
    total_params += num_params

print("Number of parameters:", total_params)

Number of parameters: 1652
