In [3]:
import numpy as np
from sklearn.datasets import fetch_openml
import tensorflow as tf
from FFN import FFLayer
from FFN import FFNetwork
import time

In [None]:
def load_mnist():
    """
    load mnist data set using TensorFlow
    """
    print("Loading MNIST dataset...")
    
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    
    X_train = X_train.reshape(60000, 784).astype(np.float32) / 255.0
    X_test = X_test.reshape(10000, 784).astype(np.float32) / 255.0
    
    y_train = y_train.astype(np.int32)
    y_test = y_test.astype(np.int32)
    
    print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")
    print(f"Pixel range: [{X_train.min():.3f}, {X_train.max():.3f}]")
    return X_train, X_test, y_train, y_test

In [None]:
def test_basic_functionality():
    """
    test the basic functionalities of the network
    """
    print("\n=== base test ===")
    
    # build a small network for test
    network = FFNetwork([794, 100, 50, 10], 0.01)  # input is 784+10=794
    
    # test single sample
    x_sample = np.random.rand(784)
    y_sample = 3
    
    # test construct_samples
    pos, neg = network.construct_samples(x_sample, y_sample)
    print(f"Sample construction: pos.shape={pos.shape}, neg.shape={neg.shape}")
    print(f"Positive label position: {np.argmax(pos[:10])}")
    print(f"Negative label position: {np.argmax(neg[:10])}")
    
    # test predict
    pred = network.predict(x_sample)
    probs = network.predict_probabilities(x_sample)
    print(f"Random prediction: {pred}, max_prob: {probs.max():.4f}")
    
    print("base test passed")

In [None]:
def run_small_scale_experiment():
    """
    run small scale experiment
    """
    print("\n=== small scale experiment ===")
    
    # load data
    X_train, X_test, y_train, y_test = load_mnist()
    
    # build net: input layer is 794-dim (784 pixel + 10 number as one-hot), hidden layers are 500 and 300, output layer is 10-dim
    network = FFNetwork([794, 500, 300, 10], 0.01)
    print(f"Network architecture: {network.layer_sizes}")
    
    # small scale training
    print("\nstart trainin...")
    start_time = time.time()
    
    # test on small data set with 5 epoch
    network.train(
        X_train=X_train[:1000], # train with 1000 test samples
        y_train=y_train[:1000], 
        X_test=X_test[:1000],  # test with 1000 test samples
        y_test=y_test[:1000],
        epochs=10
    )
    
    training_time = time.time() - start_time
    print(f"\ntraining completed, time used: {training_time:.2f} second")
    
    # detailed evaluation
    print("\n=== detailed evaluation ===")
    test_accuracy = network.evaluate(X_test[:1000], y_test[:1000])
    print(f"accuracy on the test set: {test_accuracy:.4f}")
    
    # analyze some predictions
    print("\nsample prediction analysis:")
    for i in range(5):
        true_label = y_test[i]
        pred_label = network.predict(X_test[i])
        probs = network.predict_probabilities(X_test[i])
        confidence = probs.max()
        print(f"sample{i}: true_label={true_label}, predicted_label={pred_label}, confidence={confidence:.4f}")


In [None]:
def run_performance_comparison():
    """
    a test for performance comparison of different architectures
    """
    print("\n=== performance comparison of different architectures ===")
    
    X_train, X_test, y_train, y_test = load_mnist()
    
    # test the performance of different architectures
    architectures = [
        [794, 200, 10],          # shallow
        [794, 500, 300, 10],     # medium
        [794, 800, 400, 200, 10] # deep
    ]
    
    results = []
    
    for arch in architectures:
        print(f"\ntest architecture: {arch}")
        network = FFNetwork(arch, 0.01)
        
        start_time = time.time()
        network.train(
            X_train=X_train[:500],
            y_train=y_train[:500],
            X_test=X_test[:500],
            y_test=y_test[:500],
            epochs=10
        )
        train_time = time.time() - start_time
        
        # evaluate performance
        accuracy = network.evaluate(X_test[:500], y_test[:500])
        
        results.append({
            'architecture': arch,
            'layers': len(arch) - 1,
            'parameters': sum(arch[i] * arch[i+1] for i in range(len(arch)-1)),
            'accuracy': accuracy,
            'train_time': train_time
        })
        
        print(f"accuracy: {accuracy:.4f}, training time: {train_time:.2f} second")
    
    # print results
    print("\n=== performance comparison of different architectures  ===")
    print("architecture\t\tnumber_of_layer\tparemeter_number\taccuracy\ttraining_time")
    print("-" * 60)
    for result in results:
        arch_str = str(result['architecture'])[:20]
        print(f"{arch_str:<20}\t{result['layers']}\t{result['parameters']}\t{result['accuracy']:.4f}\t{result['train_time']:.2f}s")



In [None]:
if __name__ == "__main__":
    print("Forward-Forward Algo test")
    print("=" * 50)
    
    # base test
    test_basic_functionality()
    
    # small-scale experiment
    run_small_scale_experiment()
    
    # performance comparison (optional)
    print("\n run performance comparison? type 'y' to continue")
    user_input = input().lower().strip()
    if user_input == 'y':
        run_performance_comparison()
    
    print("\ntest completed")