In [86]:
from nn import nn, preprocess, io
import numpy as np
import pytest
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [87]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [102]:
# Set random seed for reproducibility.
np.random.seed(15)

# Create simple neural networks for testing.
nn_test = nn.NeuralNetwork([{'input_dim': 3, 'output_dim': 1, 'activation': 'relu'},
                            ],
                            lr = 0.1,
                            seed = 15,
                            batch_size = 1,
                            epochs = 1,
                            loss_function = 'mse'
                            )

Forward passes

In [94]:
# Single forward pass.


A_curr, Z_curr = nn_test._single_forward(np.array( [[30, 40, 30, 20]] ),
                                         np.array( [[1]] ),
                                         np.array( [1, 2, 3, 4] ),
                                         'relu')

# Assert single forward equals manually calculated values.
assert A_curr == np.array( [[281]] )
assert Z_curr == np.array( [281] )

In [100]:
# Complete forward pass.
# Create own inputs and assess that forward pass is calculating as expected.
# Create own inputs and assess that forward pass is calculating as expected.
nn_test._param_dict = {"b1": np.array([[1], [2]]),
                           "W1": np.array([[1, 2, 3, 4], [4, 3, 2, 1]]),
                           "b2": np.array([[1]]),
                           "W2": np.array([[1, 4]]),
                           }
    
# Perform a forward pass.
y_hat, cache = nn_test.forward(np.array([1, 2, 3, 4]))
assert np.all(y_hat == np.array( [[31, 22]] ))

Back propagation.

In [80]:
# Use own inputs and perform single backprop step.
dA_prev, dW_curr, db_curr = nn_test._single_backprop(np.array([[20, 30, 40, 20]]),
                                                     np.array([[1]]),
                                                     np.array([[1]]),
                                                     np.array([[1, 2, 3, 4]]),
                                                     np.array([[5]]),
                                                     "relu"
                                                    )

# Assert single backprop step equals mannually calculated values.
assert np.array_equal(dA_prev, np.array([[100, 150, 200, 100]]))
assert np.array_equal(dW_curr, np.array([[5, 10, 15, 20]]))
assert np.array_equal(db_curr, np.array([[5]]))

In [None]:
Prediction

In [105]:
X_train = np.random.rand(75, 3)
y_train = np.random.rand(75, 1) 
X_test = np.random.rand(25, 3)
y_test = np.random.rand(25, 1)

fit = nn_test.fit(X_train, y_train, X_test, y_test)

pred = nn_test.predict(X_test)

assert y_test.shape == pred.shape

MSE

In [43]:
# Generate y and y_hat. Manual calculation is 
y = np.array( [0, 1, 1, 1, 0] )
y_hat = np.array( [0, 1, 1, 0, 1] )

# Instantiate mse calculated by implementation.
mse_method = nn_test._mean_squared_error(y, y_hat)

# Manual calculation of backprop is an array of errors depending on differences in y and y_hat.
mse_bprop = np.array( [ 0. ,  0. ,  0. , -0.4,  0.4] )

# Instantiate mse backprop calculated by implementation.
mse_bprop_method = nn_test._mean_squared_error_backprop(y, y_hat)

# Assert that method mse backprop matches manual calculation.
assert np.all(mse_bprop == mse_bprop_method)

BCE

In [65]:
# Generate y and y_hat. Manual calculation is 
y = np.array( [0.5, 0.5, 0.4, 0.5, 0.4] )
y_hat = np.array( [0.4, 0.5, 0.4, 0.4, 0.4] )

# Instantiate mse calculated by implementation.
bce_method = nn_test._binary_cross_entropy(y, y_hat)

# Manual calculation of backprop is an array of errors depending on differences in y and y_hat.
bce_bprop = np.array( [-0.08, 0.0, 0.0, -0.08, 0.0] )

# Instantiate mse backprop calculated by implementation.
bce_bprop_method = nn_test._binary_cross_entropy_backprop(y, y_hat)

# Round bce_prop_method values.
round_bce_bprop_method = [round(i, 2) for i in bce_bprop_method]

# Assert that method mse backprop matches manual calculation.
assert np.all(bce_bprop == np.array(round_bce_bprop_method) )

Sampled_seqs unit test

In [None]:
alphabet = ['A', 'T', 'C', 'G']
seqs = []

# Create list of unbalanced sequences and corresponding labels.
for seq in range(1000):
    seq = []
    for char in range(17):
        seq += np.random.choice(alphabet)
    seqs += [seq]

labels = [True for lab in range(800)] + [False for x in range(200)]

# Perform balanced sampling.
sampled_seqs, sampled_labels = preprocess.sample_seqs(seqs, labels)

# Create separate lists for sampled labels.
pos_labs = []
neg_labs = []
for lab in sampled_labels:
    if lab == True:
        pos_labs += [lab]
    else:
        neg_labs += [lab]

# Assert that sampled sequences + labels are same size as original lists.
assert len(seqs) == len(sampled_seqs), 'Sampled sequences do not match original list length.'
assert len(labels) == len(sampled_labels), 'Sampled labels do not match original list length.'

# Assert that positive and sequences are relatively balanced (0.5 error) based on length of their sampled lists.
assert abs( len(pos_labs) - len(neg_labs) ) < 50, 'Classes are not balanced after sampling.' 

One_hot_encode unit test

In [None]:
seqs = ['ATCG',
        'GCTA']

actual_encodings = np.array( [[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                              [0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0]]
                           )

encoded_seqs = preprocess.one_hot_encode_seqs(seqs)

assert np.all(actual_encodings == encoded_seqs), 'One-hot encoding is not outputting the expected sequence encoding.'