Function that initializes a network using paramaters in a dictionary

In [None]:
from neural_network import NeuralNetwork
from layers import Softmax, FeedForward, Attention, EmbedPosition, LinearLayer


def init_neural_network(t_params):
    """t_params: training paramaters. see the functions in `train_test_params.py`."""
    transformer = [
        (
            FeedForward(d=t_params["d"], p=t_params["p"]),
            Attention(d=t_params["d"], k=t_params["k"]),
        )
        for _ in range(t_params["L"])
    ]
    embed_pos = EmbedPosition(
        n_max=t_params["n_max"], m=t_params["m"], d=t_params["d"]
    )
    un_embed = LinearLayer(input_size=t_params["d"], output_size=t_params["m"])
    softmax = Softmax()

    network = NeuralNetwork(
        [
            embed_pos,

            # don't even try to understand this...
            *[
                t_layer
                for transformer_layer in transformer
                for t_layer in transformer_layer
            ],
            un_embed,
            softmax,
        ]
    )
    return network


# Sorting problem

In [14]:
from train_test_params import get_training_params_sort
sort_params = get_training_params_sort()

Prepare training and testing data for the sorting problem

In [None]:
from data_generators import get_train_test_sorting

training_data = get_train_test_sorting(
    length=sort_params['r'],
    num_ints=sort_params['m'],
    samples_per_batch=sort_params['D'],
    n_batches_train=sort_params['b_train'],
    n_batches_test=sort_params['b_test'],
)

x_train = training_data["x_train"]
y_train = training_data["y_train"]
x_test = training_data["x_test"]
y_test = training_data["y_test"]

Let's initialize the network

In [None]:
network = init_neural_network(sort_params)

Train the network using `CrossEntropy` as the loss function (object function).

In [None]:
from train_network import train_network
from layers import CrossEntropy

loss = CrossEntropy()

train_network(
    network=network,
    x_train=x_train,
    y_train=y_train,
    loss_func=loss,
    alpha=sort_params['alpha'],
    n_iter=sort_params['n_iter'],
    num_ints=sort_params['m'],
    dump_to_pickle_file=False,
)

Or load a pre-trained network from a pickle dump

In [None]:
import dill as pickle

with open("nn_dump_exer3.pkl", "rb") as f:
    network = pickle.load(f)

In [None]:
from test_network import test_trained_network

test_trained_network(
    network=network, x_test=x_test, y_test=y_test, num_ints=sort_params['m']
)

# Addition problem

In [None]:
from train_test_params import get_training_params_addition
add_params = get_training_params_addition()

In [None]:
network = init_neural_network(add_params)

In [None]:
from data_generators import get_train_test_addition

# prepare training and test data for addition problem
training_data = get_train_test_addition(
    n_digit = add_params['r'],
    samples_per_batch = add_params['D'],
    n_batches_train = add_params['b_train'],
    n_batches_test=add_params['b_test']
)

x_train = training_data["x_train"]
y_train = training_data["y_train"]
x_test = training_data["x_test"]
y_test = training_data["y_test"][:, :, ::-1]    # remember that (c0, c1, c2) is reversed in the training data.

In [None]:
from train_network import train_network
from layers import CrossEntropy

loss = CrossEntropy()

train_network(
    network=network,
    x_train=x_train,
    y_train=y_train,
    loss_func=loss,
    alpha=add_params['alpha'],
    n_iter=add_params['n_iter'],
    num_ints=add_params['m'],
    dump_to_pickle_file=False,
)

In [None]:
import dill as pickle

with open("nn_dump_add.pkl", "rb") as f:
    network = pickle.load(f)

In [None]:
from test_network import test_trained_network

test_trained_network(
    network=network, x_test=x_test, y_test=y_test, num_ints=add_params['m']
)