In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
from random import sample
from functions.data_preparation import *
from functions.quick_maths import *
from functions.util import *

#### neural network building blocks

In [None]:
def initialize_parameters(layer_dims):
	parameters = {}

	for l in range(1, len(layer_dims)):
		parameters[f"W{l}"] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
		parameters[f"b{l}"] = np.zeros((layer_dims[l], 1))

	return parameters

In [None]:
def linear_forward(W, b, A_prev):
	Z = np.dot(W, A_prev) + b

	WbA_prev = (W, b, A_prev)

	return Z, WbA_prev

In [None]:
def linear_activation_forward(W, b, A, activation):
	Z, WbA = linear_forward(W, b, A)

	A = activation(Z)

	return A, (WbA, Z)

In [None]:
def forward_propagation(X, parameters, activation_functions, L):
	caches = []
	A = X

	for l in range(1, L):
		A, WbA_Z = linear_activation_forward(parameters[f"W{l}"], parameters[f"b{l}"], A, activation_functions[l - 1])
		caches.append(WbA_Z)

	y_hat = A  # the final value of A is equivalent to y_hat, it's more logical to return the proper term
	return y_hat, caches

In [None]:
def linear_backward(dZ, cache, m):
	W, b, A_prev = cache

	dW = 1 / m * np.dot(dZ, A_prev.T)
	db = 1 / m * np.sum(dZ, axis=1, keepdims=True)
	dA_prev = np.dot(W.T, dZ)

	return dW, db, dA_prev

In [None]:
def linear_activation_backward(dA, caches, d_activation, m):
	linear_cache, activation_cache = caches

	dZ = dA * d_activation(activation_cache)
	dW, db, dA_prev = linear_backward(dZ, linear_cache, m)

	return dW, db, dA_prev

In [None]:
def back_propagation(Y_hat, Y, caches, d_act_functions, m):
	grads = {}
	L = len(caches)  # n_activated_layers

	grads[f"dA{L}"] = -(np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))

	for l in reversed(range(1, L + 1)):
		grads[f"dW{l}"], grads[f"db{l}"], grads[f"dA{l - 1}"] = linear_activation_backward(grads[f"dA{l}"],
																						   caches[l - 1],
																						   d_act_functions[l - 1], m)

	return grads

In [None]:
def update_parameters(parameters, grads, L, alpha, reg_term):
	params = parameters.copy()

	for l in range(1, L):
		params[f"W{l}"] -= (alpha * (grads[f"dW{l}"] + reg_term[l - 1]))
		params[f"b{l}"] -= alpha * (grads[f"db{l}"])

	return params

#### debug helpers

In [None]:
def forward_prop_and_cost(X, Y, parameters, act_functions, L, m):
	Y_hat, _ = forward_propagation(X, parameters, act_functions, L)
	cost = cross_entropy_cost(Y_hat, Y, m)

	return cost

In [None]:
def get_nudged_cost(X, Y, theta, i, layer_dims, act_functions, L, m, epsilon):
	theta[i] += epsilon
	cost = forward_prop_and_cost(X, Y, vector_to_dict(theta, "W", "b", layer_dims), act_functions, L, m)
	return cost

In [None]:
def check_gradients(X, Y, parameters, grads, layer_dims, act_functions, L, m, epsilon=10e-7):
	theta = dict_to_vector(parameters, 'W', 'b', L)
	d_theta = dict_to_vector(grads, 'dW', 'db', L)

	d_theta_approx = np.zeros_like(theta)

	for i in range(len(theta)):
		cost_plus = get_nudged_cost(X, Y, theta.copy(), i, layer_dims, act_functions, L, m, epsilon)
		cost_minus = get_nudged_cost(X, Y, theta.copy(), i, layer_dims, act_functions, L, m, -epsilon)

		d_theta_approx[i] = (cost_plus - cost_minus) / (2 * epsilon)

	remainder = L2_norm(d_theta_approx - d_theta) / (L2_norm(d_theta_approx) + L2_norm(d_theta))

	if remainder > 10e-5:
		print(
			f"Warning!\nError is unusually high ({remainder}), there is most likely an error in gradient calculation!")
	else:
		# print(f"remainder is {remainder}! Gradient implementation should be correct!")
		[print(
			f"\033[{x}m`!;<>;`!`!;<>;`!remainder is {remainder}! Gradient implementation should be correct!!`!;<>;`!`!;<>;`!\033[0m")
		 for x in range(0, 130)]

In [None]:
print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(1) + "\033[0m")

#### general utility

In [None]:
def pipeline(X, Y, m_axis, m_reduced, hyper_parameters):
	X, Y = get_aligned_dfs(X, Y, m_axis)

	if m_reduced != 0:
		X, Y = get_reduced_dataframes(X, Y, m_reduced, m_axis)

	# slightly verbose for clarity
	learned_parameters, computed_costs = train_model(X, Y, m_axis, **hyper_parameters)

	return learned_parameters, computed_costs

In [None]:
def alpha_decay(alpha, count_decreased_alpha, consec_cost_increases):
	if consec_cost_increases == 3:
		fancy_print(f'Cost is increasing. Reducing alpha from {alpha} to {alpha / 3}', 'yellow')
		alpha /= 3
		count_decreased_alpha += 1
	return alpha, count_decreased_alpha

In [None]:
def manage_costs(cost, last_cost, costs, consec_cost_increases, i, num_iter):
	if i % 5 == 0: costs.append((i, cost))

	if cost < last_cost or last_cost == 0:
		consec_cost_increases = 0
	else:
		consec_cost_increases += 1

	if i % 20 == 0 or i == num_iter - 1:
		print(f"Cost at {i}: {cost}")

	return consec_cost_increases, cost

In [None]:
def update_best_parameters(parameters, cost, last_cost):
	if cost < last_cost:
		return parameters

#### THE HEART

In [None]:
def train_model(X, Y, m_axis, layer_dims, act_list, act_dict, num_iterations, alpha, lambd):
	np.random.seed(1)

	act_functions = get_act_functions(act_list, act_dict)
	d_act_functions = get_act_functions(get_d_strings(act_list), act_dict)

	consec_cost_increases = 0
	count_decreased_alpha = 0

	# starting costs with an ambiguous large value to avoid an unnecessary if statement on ever iteration
	costs = []
	last_cost = 0

	m = Y.shape[m_axis]
	L = len(layer_dims)
	reg_term = [0 for _ in range(L)]
	layer_dims[0] = X.shape[1 - m_axis]

	parameters = initialize_parameters(layer_dims)
	best_parameters = parameters

	for i in range(num_iterations):
		Y_hat, caches = forward_propagation(X, parameters, act_functions, L)

		if lambd == 0:
			cost = cross_entropy_cost(Y_hat, Y, m)
		else:
			weights = get_weights(parameters, L)
			cost = L2_cross_entropy_cost(Y_hat, Y, lambd, weights, m)
			reg_term = d_L2_regularization(lambd, m, weights)

		grads = back_propagation(Y_hat, Y, caches, d_act_functions, m)

		best_parameters = update_best_parameters(parameters, cost, last_cost)

		# check_gradients(X, Y, parameters, grads, layer_dims, act_functions, L)

		parameters = update_parameters(parameters, grads, L, alpha, reg_term)

		vector = dict_to_vector(parameters, 'W', 'b', L)
		np.testing.assert_equal(vector, dict_to_vector(parameters, 'W', 'b', L),
								vector_to_dict(vector, 'W', 'b', layer_dims), verbose=True)

		consec_cost_increases = manage_costs(cost, last_cost, costs, consec_cost_increases, i, num_iterations)

		alpha, count_decreased_alpha = alpha_decay(alpha, count_decreased_alpha, consec_cost_increases)

		if count_decreased_alpha == 7:
			print("Cost increasing, stopping training early.\nThe best parameters learned were saved and returned.")
			return best_parameters, costs

	fancy_print(f"`!;<>;`!\nTraining finished successfully {num_iterations}!", 'green')
	return best_parameters, costs

In [None]:
# global variables
np.random.seed(1)
sns.set_style("darkgrid")
activation_dict = {"relu": relu, "d_relu": d_relu, "sigmoid": sigmoid, "d_sigmoid": d_sigmoid, "leaky_relu": leaky_relu,
				   "d_leaky_relu": d_leaky_relu}
m_axis = 1

# adaptive global variables
m_reduced = 5000

In [None]:
X_train, Y_train, X_val, Y_val, X_test, Y_test = prepare_dataframes(*load_data(), m_axis)

In [None]:
# hyperparameters
hyper_parameters = {
	'alpha': 0.3,
	'lambd': 0,
	'num_iterations': 3000,
	'layer_dims': [0, 3, 2, 1],  # first entry gets updated, fret not
	'act_list': ['relu', 'relu', 'sigmoid'],
	'act_dict': activation_dict
}

gradient_checking_parameters = {
	'X': X_train,
	'Y': Y_train,
	'm_axis': m_axis,
	'm_reduced': 5,
	'hyper_parameters': {
		'alpha': 0.1,
		'lambd': 0,
		'num_iterations': 1,
		'layer_dims': [0, 3, 2, 1],
		'act_list': ['relu', 'relu', 'sigmoid'],
		'act_dict': activation_dict}
}

# learned_parameters, computed_costs = pipeline(X_train, Y_train, m_axis, m_reduced, hyper_parameters)
# pipeline(**gradient_checking_parameters)

In [None]:
learned_parameters, computed_costs = pipeline(X_train, Y_train, m_axis, m_reduced, hyper_parameters)

In [None]:
# p, c = pipeline(**gradient_checking_parameters)

#### construction site

In [None]:
class Stats:
	def __init__(self, tp, tn, fp, fn):
		self.tp =
		self.tn = get_true_negatives()
		self.fp =
		self.fn = fn

In [None]:
def get_stats(Y, predictions):
	true_positives = np.sum(Y == 1 == predictions)
	return p, n, tp, tn

In [None]:
def get_true_positives(Y, predictions):
	return np.sum(np.equal(np.equal(Y, 1), predictions))

In [None]:
def get_true_negatives(Y, predictions):
	return np.sum(np.equal(np.equal(Y, 0), predictions))

In [None]:
def get_predictions(Y_hat, threshold=0.5):
	predictions = np.ones_like(Y_hat)
	predictions[Y_hat < threshold] = 0
	return predictions

In [None]:
def predict(X, params, activation_list, activation_dict, L):
	act_functions = get_act_functions(activation_list, activation_dict)

	Y_hat, _ = forward_propagation(X, params, act_functions, L)

	return Y_hat

In [None]:
Y = np.array([1, 0, 1, 1, 0]).reshape([-1, 1])
Y_hat = np.array([0.8, 0.4, 0.1, 0.9, 0.7]).reshape([-1, 1])

In [None]:
print(np.equal(Y, get_predictions(Y_hat)))

In [None]:
confusion_matrix(Y, get_predictions(Y_hat))

In [None]:
get_true_positives(Y, get_predictions(Y_hat))

In [None]:
predictions = get_predictions(Y_hat)

In [None]:
get_true_positives(Y, predictions)

In [None]:
np.array([1, 0, 1, 1, 1, 0])

In [None]:
# def accuracy():

In [None]:
Y_hat = predict(X_val, learned_parameters, activation_list, activation_dict, L)

In [None]:
pd.DataFrame(Y_hat).describe()

In [None]:
predictions = get_predictions(Y_hat, Y_val)

In [None]:
Y_hat

In [None]:
predict(X_val, Y_val, learned_parameters, activation_list, activation_dict, L).describe()

In [None]:
calculate_metrics(learned_parameters, m_reduced)

In [None]:
def save_parameters(params):
	np.save('best_parameters', params)

# save_parameters(learned_parameters)

In [None]:
pd.DataFrame(Y_train[:, :100].T).describe()
# TODO
# SHUFFLE DATAFRAME PROPERLY, ITS FKING SORTED xd

In [None]:
def calculate_metrics(params, X, Y):

# ADD ROC CURVE;  PRECISION/RECALL

In [None]:
sns.lineplot(x=[x[0] for x in computed_costs], y=[x[1] for x in computed_costs])