In [2]:
import sys
import os
import numpy as np
import pandas as pd
import random

np.random.seed(42)

NUM_FEATS = 90

class Net(object):
	'''
	'''

	def __init__(self, num_layers, num_units):
		np.random.seed(42)
		self.num_layers = num_layers
		self.num_units = num_units
		self.biases = []
		self.weights = []
		for i in range(num_layers):
			if i==0:
				# Input layer
				self.weights.append(np.random.uniform(-1, 1, size=(NUM_FEATS, self.num_units)))				
			else:
				# Hidden layer
				self.weights.append(np.random.uniform(-1, 1, size=(self.num_units, self.num_units)))
			self.biases.append(np.random.uniform(-1, 1, size=(1, self.num_units)))
		# Output layer
		self.biases.append(np.random.uniform(-1, 1, size=(1, 1)))
		self.weights.append(np.random.uniform(-1, 1, size=(self.num_units, 1)))
	def __call__(self, X):
		weights = self.weights
		biases = self.biases
		self.activations = []
		self.dotz = []
		self.activations.append(X)
		for layer_num in range(self.num_layers):
			dotz = np.dot(X, weights[layer_num]) + biases[layer_num]
			self.dotz.append(dotz)
			activation = np.maximum(dotz, 0)
			self.activations.append(activation)
			X = activation
		dotz_out = np.dot(X, weights[-1]) + biases[-1]
		self.dotz.append(dotz_out)
		y_hat = dotz_out
		self.activations.append(y_hat)
		return y_hat
		raise NotImplementedError
	def backward(self, X, y, lamda):
		del_W = []
		del_b = []
		m = y.shape[0]
		L = self.num_layers
		a_list = self.activations
		z_list = self.dotz
		weights = self.weights
		biases = self.biases
		y = np.reshape(y, (y.shape[0], 1))
		del_aL = (2/m) * (a_list[-1] - y)
		del_WL = np.dot(a_list[-2].T, (del_aL)) + lamda * (weights[-1])
		del_bL = np.sum(del_aL, axis=0) + lamda * (biases[-1])
		del_W.append(del_WL)
		del_b.append(del_bL)
		
		del_al = del_aL
		for l in reversed(range(1, L+1)):
			del_al = np.dot((del_al), weights[l].T)
			del_Wl = np.dot(a_list[l-1].T, (del_al*(z_list[l-1] > 0))) + lamda * (weights[l-1])
			del_bl = np.sum(del_al*(z_list[l-1] > 0), axis=0) + lamda * (biases[l-1])
			del_W.append(del_Wl)
			del_b.append(del_bl)	
		del_W = list(reversed(del_W))
		del_b = list(reversed(del_b))
		return del_W, del_b
		raise NotImplementedError
class Optimizer(object):
	def __init__(self, learning_rate):
		self.learning_rate = learning_rate
		self.delta_weights = []
		self.delta_biases = []
		#raise NotImplementedError
	def step(self, weights, biases, delta_weights, delta_biases):
		lr = self.learning_rate
		for layer_num in range(len(weights)):
			weights[layer_num] -= lr * delta_weights[layer_num]
			biases[layer_num] -= lr * delta_biases[layer_num]
		return weights, biases
		raise NotImplementedError


def loss_mse(y, y_hat):

	m = y.shape[0]
	y = np.reshape(y, (y.shape[0], 1))
	mse = (1/m)*np.sum((y - y_hat)**2)
	return mse
	raise NotImplementedError
def loss_regularization(weights, biases):
	running_sum = 0
	for layer_num in range(len(weights)):
		running_sum += np.sum((weights[layer_num])**2) + np.sum((biases[layer_num])**2)
	return running_sum
	raise NotImplementedError
def loss_fn(y, y_hat, weights, biases, lamda):
	
	l2_loss = loss_mse(y, y_hat) + lamda * loss_regularization(weights, biases)
	return l2_loss
	raise NotImplementedError

def rmse(y, y_hat):
	rsme = (loss_mse(y, y_hat))**0.5
	return rsme
	raise NotImplementedError


def train(
	net, optimizer, lamda, batch_size, max_epochs,
	train_input, train_target,
	dev_input, dev_target
):
	m = train_input.shape[0]
	for e in range(max_epochs):
		epoch_loss = 0.
		epoch_loss_rmse = 0.
		iter_count = 0
		for i in range(0, m, batch_size):
			iter_count += 1

			batch_input = train_input[i:i+batch_size]
			batch_target = train_target[i:i+batch_size]

			pred = net(batch_input)

			# Compute gradients of loss w.r.t. weights and biases
			dW, db = net.backward(batch_input, batch_target, lamda)

			#norm_dW = [np.sum(grad_mat**2) for grad_mat in dW]
			#norm_db = [np.sum(grad_mat**2) for grad_mat in db]

			#if iter_count%200 == 0:
			#	print('norm of gradients')
			#	print(norm_dW, norm_db)
			# Get updated weights based on current weights and gradients
			weights_updated, biases_updated = optimizer.step(net.weights, net.biases, dW, db)

			#norm_W = [np.sum(weight_mat**2) for weight_mat in weights_updated]
			#norm_b = [np.sum(bias_mat**2) for bias_mat in biases_updated]

			#if iter_count%200 == 0:
			#	print('norm of weights and biases')
			#	print(norm_W, norm_b)
			# Update model's weights and biases
			net.weights = weights_updated
			net.biases = biases_updated

			# Compute loss for the batch
			batch_loss = loss_fn(batch_target, pred, net.weights, net.biases, lamda)
			epoch_loss += batch_loss
			batch_loss_rmse = rmse(batch_target, pred)
			epoch_loss_rmse += (batch_loss_rmse)**2

	

		print(f'Epoch Loss for epoch {e}: {(epoch_loss_rmse*(batch_size/m))**0.5}')


	dev_pred = net(dev_input)
	dev_rmse = rmse(dev_target, dev_pred)

	print('RMSE on dev data: {:.5f}'.format(dev_rmse))


def get_test_data_predictions(net, inputs):
	
	test_preds = np.float32(np.round(net(inputs)))

	Ids = np.arange(1, test_preds.shape[0] + 1, 1, dtype='f')
	Ids = np.reshape(Ids, (Ids.shape[0], 1))
	
	predictions = np.concatenate((Ids, test_preds), axis=1)

	test_data_predictions = pd.DataFrame(predictions, columns=['Id', 'Predicted'])
	test_data_predictions.to_csv('193109010.csv', index=False)
	return test_preds
	raise NotImplementedError

def read_data():
	df1 = pd.read_csv('train.csv')
	train_input = df1.iloc[:, 1:].to_numpy()
	indices = np.arange(0, train_input.shape[0])
	np.random.shuffle(indices)
	train_input = train_input[indices]
	train_target = df1.iloc[:, 0].to_numpy()
	train_target = train_target[indices]


	df2 = pd.read_csv('dev.csv')
	dev_input = df2.iloc[:, 1:].to_numpy()
	dev_target = df2.iloc[:, 0].to_numpy()

	
	df3 = pd.read_csv('test.csv')
	test_input = df3.iloc[:, 0:].to_numpy()

	return train_input, train_target, dev_input, dev_target, test_input


def main():

	# These parameters should be fixed for Part 1
	max_epochs = 50
	batch_size = 128


	learning_rate = 0.001
	num_layers = 1
	num_units = 64
	lamda = 0.0 # Regularization Parameter

	train_input, train_target, dev_input, dev_target, test_input = read_data()
	net = Net(num_layers, num_units)
	optimizer = Optimizer(learning_rate)
	train(
		net, optimizer, lamda, batch_size, max_epochs,
		train_input, train_target,
		dev_input, dev_target
	)
	#get_test_data_predictions(net, test_input)
	

if __name__ == '__main__':
	main()




Epoch Loss for epoch 0: 937.958622229471
Epoch Loss for epoch 1: 29.157330842487237
Epoch Loss for epoch 2: 20.917683434177867
Epoch Loss for epoch 3: 19.358541018717364
Epoch Loss for epoch 4: 18.1186036908682
Epoch Loss for epoch 5: 17.177130062984176
Epoch Loss for epoch 6: 16.468431071074612
Epoch Loss for epoch 7: 15.925376882423365
Epoch Loss for epoch 8: 15.49840063838708
Epoch Loss for epoch 9: 15.153968866452026
Epoch Loss for epoch 10: 14.869685931376276
Epoch Loss for epoch 11: 14.630368911530365
Epoch Loss for epoch 12: 14.425464390402349
Epoch Loss for epoch 13: 14.247495749007317
Epoch Loss for epoch 14: 14.091039581043136
Epoch Loss for epoch 15: 13.9520187742865
Epoch Loss for epoch 16: 13.827362242570903
Epoch Loss for epoch 17: 13.714768408586513
Epoch Loss for epoch 18: 13.612310452819585
Epoch Loss for epoch 19: 13.518666528941651
Epoch Loss for epoch 20: 13.432412700523164
Epoch Loss for epoch 21: 13.35234860496766
Epoch Loss for epoch 22: 13.28020791463353
Epoch L