In [None]:
from numpy import reshape, transpose, sum, zeros, mean, convolve, full
from numpy.random import default_rng, permutation
import matplotlib.pyplot as plt

from loader import load_data


def dnn(wa, wb, x):
	return sigmoid(x @ wa) @ wb


def plot_prediction(x, y_groundtruth, y_predicted):
	fig, (ax1, ax2) = plt.subplots(1, 2)
	fig.set_size_inches(15, 5)

	p = ax1.scatter(x[:, 0], x[:, 1], c=y_groundtruth, s=50, label="ground truth")
	plt.colorbar(p, ax=ax1)
	ax1.scatter(x[:, 0], x[:, 1], c=y_predicted, s=100, marker="x", label="predicted") 
	ax1.legend()
	ax1.axis("equal") 
	ax1.set_xlabel("x0", fontsize=24)
	ax1.set_ylabel("x1", fontsize=24)
	ax1.tick_params(axis="both", which="major", labelsize=16)

	ax2.plot([-1, 1], [-1, 1], "k:")
	ax2.scatter(y_groundtruth , y_predicted , s=10) 
	ax2.axis("equal")
	ax2.set_xlabel("y ground truth", fontsize=24) 
	ax2.set_ylabel("y predicted", fontsize=24)
	ax2.tick_params(axis="both", which="major", labelsize=16)

	plt.show()


def plot_mse(mse_mean_train):
	smoothing_length = 11
	mse_mean_train_smoothed = convolve(mse_mean_train , full((smoothing_length ,), 1 / smoothing_length), mode="valid") 

	fig, ax = plt.subplots(2, 2)
	fig.set_size_inches(10, 10)

	ax[0, 0].plot(mse_mean_train , c="tab:orange")
	ax[0, 0].plot(range(smoothing_length // 2, len(mse_mean_train) - smoothing_length // 2), mse_mean_train_smoothed , c="k")
	ax[0, 0].set_ylabel("MSE", fontsize=24)
	ax[0, 0].tick_params(axis="both", which="major", labelsize=16)

	ax[1, 0].semilogy(mse_mean_train , c="tab:orange")
	ax[1, 0].semilogy(range(smoothing_length // 2, len(mse_mean_train) - smoothing_length // 2), mse_mean_train_smoothed , c="k")
	ax[1, 0].set_xlabel("epoch", fontsize=24) 
	ax[1, 0].set_ylabel("MSE", fontsize=24)
	ax[1, 0].tick_params(axis="both", which="major", labelsize=16)

	ax[0, 1].semilogx(mse_mean_train , c="tab:orange")
	ax[0, 1].semilogx(range(smoothing_length // 2, len(mse_mean_train) - smoothing_length // 2), mse_mean_train_smoothed , c="k")
	ax[0, 1].tick_params(axis="both", which="major", labelsize=16)

	ax[1, 1].loglog(mse_mean_train , c="tab:orange") 
	ax[1, 1].loglog(range(smoothing_length // 2, len(mse_mean_train) - smoothing_length // 2), mse_mean_train_smoothed , c="k")
	ax[1, 1].set_xlabel("epoch", fontsize=24)
	ax[1, 1].tick_params(axis="both", which="major", labelsize=16)

	plt.show()


# load data
(x, y_groundtruth) = load_data(filename="data_reg_2d_nonlinear.csv")

# define neural network
number_neurons = 10

def sigmoid(x):
	from numpy import exp
	return 1 / (1 + exp(-x))

def derivative_sigmoid(x):
	return sigmoid(x) * (1 - sigmoid(x))

rng = default_rng()
wa = rng.standard_normal(size=(2, number_neurons)) # input weights for 1st layer (layer a)
wb = rng.standard_normal(size=(number_neurons, 1)) # input weights for output neuron (layer b)

# calculate and plot prediction
plot_prediction(x, y_groundtruth, y_predicted=dnn(wa, wb, x))

# train neural network
number_samples = len(x)
number_batches = 10
batch_size = int(number_samples / number_batches)
number_epochs = 10**4
eta = .1 # learning rate

mse_mean_train = zeros((number_epochs,))

for epoch in range(number_epochs):
	# Permute samples and ground truth 
	permuted_order_samples = permutation(number_samples)
	x_permuted = x[permuted_order_samples]
	y_groundtruth_permuted = y_groundtruth[permuted_order_samples]

	for batch_start_sample in range(0, number_samples, batch_size):
		# initialize weight increments
		dwa = zeros(wa.shape);
		dwb = zeros(wb.shape);

		for selected in range(batch_start_sample, batch_start_sample + batch_size): # select a random sample
			selected = rng.integers(0, number_samples)
			x_selected = reshape(x_permuted[selected], (1, -1))
			y_groundtruth_selected = reshape(y_groundtruth_permuted[selected], (1, -1))

			# detailed neural network calculation
			x_selected_a = x_selected # input 1st layer (layer a)
			p_a = x_selected_a @ wa # activation potential 1st layer (layer a) 
			y_selected_a = sigmoid(p_a) # output 1st layer (layer a)

			x_selected_b = y_selected_a # input output neuron (layer b)
			p_b = x_selected_b @ wb # activation potential output neuron (layer b) 
			y_selected_b = p_b # output output neuron (layer b) / note linear output

			y_predicted_selected = y_selected_b

			# error
			error = y_predicted_selected - y_groundtruth_selected

			# update weight increments
			delta_b = error * 1 # note linear output
			dwb -= eta * delta_b * transpose(x_selected_b)

			delta_a = sum(wb * delta_b, axis=1) * derivative_sigmoid(p_a) 
			dwa -= eta * delta_a * transpose(x_selected_a)

		# update weights
		wa += dwa / batch_size 
		wb += dwb / batch_size

	y_predicted = sigmoid(x @ wa) @ wb
	mse_mean_train[epoch] = mean((y_predicted - y_groundtruth)**2)

	print(f"epoch {epoch}\t MSE = {mse_mean_train[epoch]:.4f}")

# calculate and plot prediction on training data
plot_prediction(x, y_groundtruth, y_predicted=dnn(wa, wb, x))

# plot training MSE as a function of epoch
plot_mse(mse_mean_train)

# calculate and plot prediction on test data
(x_test, y_groundtruth_test) = load_data(filename="data_reg_2d_nonlinear_test.csv")
plot_prediction(x_test, y_groundtruth_test, y_predicted=dnn(wa, wb, x_test))