# 1

## 1.1

We want to train a model that predicts $d$ given $d = a \cdot b + c$.
$a$, $b$ and $c$ are non-negative and $a$ and $c$ are two-digit integers and $b$ is a one-digit integer.
Write a three-digit number $n$ in base $10$ as $n_0 n_1 n_2$ ($n_0 \cdot 10^2 + n_1 \cdot 10^1 + n_2 \cdot 10^0$).
Since $d \in \{ 0, 990 \}$ and we reverse the digits the training set $\{x, y\}$ would become:

\begin{align}
    x &= [ a_0, a_1, b, c_0, c_1, d_2, d_1 ] \\
    y &= [ a_1, b, c_0, c_1, d_2, d_1, d_0 ]
\end{align}

A concrete example shows that padding with zeros keeps the length constant:

$$
    a = 5, b = 5, c = 33 \\
    a \cdot b + c = 58
$$

gives

\begin{align*}
    x &= [0,5,5,3,3,8,5] \\
    y &= [5,5,3,3,8,5,0].
\end{align*}

# Sorting problem

In [None]:
from train_test_params import *
sort_params = SortParams1()

Prepare training and testing data for the sorting problem

In [None]:
from data_generators import get_train_test_sorting

training_data = get_train_test_sorting(
    length=sort_params.r,
    num_ints=sort_params.m,
    samples_per_batch=sort_params.D,
    n_batches_train=sort_params.b_train,
    n_batches_test=sort_params.b_test,
)

x_train = training_data["x_train"]
y_train = training_data["y_train"][:, :, sort_params.r - 1:]
x_test = training_data["x_test"]
y_test = training_data["y_test"]

Let's initialize the network

In [None]:
from train_network import init_neural_network

network = init_neural_network(sort_params)

Train the network using `CrossEntropy` as the loss function (object function).

In [None]:
from train_network import train_network
from layers_numba import CrossEntropy

loss = CrossEntropy()

train_network(
    network=network,
    x_train=x_train,
    y_train=y_train,
    loss_func=loss,
    alpha=sort_params.alpha,
    n_iter=sort_params.n_iter,
    num_ints=sort_params.m,
    dump_to_pickle_file=False,
)

Or load a pre-trained network from a pickle dump

In [None]:
import dill as pickle

with open("nn_dump_exer3.pkl", "rb") as f:
    network = pickle.load(f)

In [None]:
from test_network import test_trained_network

test_trained_network(
    network=network, x_test=x_test, y_test=y_test, num_ints=sort_params.m
)

# Addition problem

In [None]:
from train_test_params import AddParams

add_params = AddParams()

In [None]:
from train_network import init_neural_network

network = init_neural_network(add_params)

In [None]:
from data_generators import get_train_test_addition

# prepare training and test data for addition problem
training_data = get_train_test_addition(
    n_digit = add_params.r,
    samples_per_batch = add_params.D,
    n_batches_train = add_params.b_train,
    n_batches_test=add_params.b_test
)

x_train = training_data["x_train"]
y_train = training_data["y_train"][:, :, add_params.r*2 - 1:]
x_test = training_data["x_test"]
y_test = training_data["y_test"][:, :, ::-1]    # remember that (c0, c1, c2) is reversed in the training data.

In [None]:
from train_network import train_network
from layers_numba import CrossEntropy

loss = CrossEntropy()

train_network(
    network=network,
    x_train=x_train,
    y_train=y_train,
    loss_func=loss,
    alpha=add_params.alpha,
    n_iter=add_params.n_iter,
    num_ints=add_params.m,
    is_numba_dump=True,
    dump_to_pickle_file=True,
)

In [None]:
import dill as pickle

with open("nn_dump_add.pkl", "rb") as f:
    network = pickle.load(f)

In [None]:
from test_network import test_trained_network

test_trained_network(
    network=network, x_test=x_test, y_test=y_test, num_ints=add_params.m
)