In [1]:
import numpy as np
from numpy import genfromtxt

In [2]:
train_data = genfromtxt('./data/train1.csv', delimiter=',')

In [3]:
train_X, train_Y = train_data[:,:2], train_data[:,2:]

In [4]:
mean, std = train_X.mean(axis=0, keepdims=True), train_X.std(axis=0, keepdims=True)
normalized_X = (train_X - mean)/std
normalized_X = np.expand_dims(normalized_X, axis=2)
train_Y = np.expand_dims(train_Y, axis=2)

orig_normalized_X = np.copy(normalized_X)
orig_train_Y = np.copy(train_Y)
permutation = np.random.permutation(len(train_X))
normalized_X = normalized_X[permutation]
train_Y = train_Y[permutation]

In [5]:
class Model():
    def __init__(self, hidden_units=50):
        self.input_units, self.hidden_units, self.output_units = 2, hidden_units, 2 
        self.W_1 = np.random.normal(0, 1,(self.hidden_units, 2))
        #print('self.W_1', self.W_1.shape)
        self.b_1 = np.random.normal(0, 1, (1,1))
        #print('self.b_1', self.b_1.shape)
        self.W_2 = np.random.normal(0, 1,(self.output_units, self.hidden_units))
        #print('self.W_2', self.W_2.shape)
        self.b_2 = np.random.normal(0, 1, (1,1))
        #print('self.b_2', self.b_2.shape)
    
    def forward(self, j):
        self.f_1 = np.dot(self.W_1, self.X_train[j]) + self.b_1
        #print('self.f_1', self.f_1.shape)
        self.a_1 = 1/(1+np.exp(-self.f_1))
        #print('self.a_1', self.a_1)
        self.f_2 = np.dot(self.W_2, self.a_1) + self.b_2
        #print('self.f_2', self.f_2.shape)
        self.a_2 = 1/(1+np.exp(-self.f_2))
        #print('self.a_2', self.a_2)
        return self.a_2
    
    def calculate_gradients(self, error, lr, j):
        grad_se = -error
        #print('grad_se', grad_se)
        grad_a_2 = np.multiply(grad_se, np.multiply(self.a_2, 1-self.a_2)) 
        #print('grad_a_2', grad_a_2)
        grad_b_2 = grad_a_2.sum(axis=0, keepdims=True) 
        #print('grad_b_2', grad_b_2.shape)
        grad_W_2 = np.dot(grad_a_2, self.a_1.T)
        #print('grad_W_2', grad_W_2.shape)
        grad_a_1 = np.dot(self.W_2.T, grad_a_2)
        #print('grad_a_1', grad_a_1.shape)
        grad_f_1 = np.multiply(grad_a_1, np.multiply(self.a_1, 1-self.a_1))  
        #print('grad_f_1', grad_f_1.shape)
        grad_b_1 = grad_f_1.sum(axis=0, keepdims=True)
        #print('grad_b_1', grad_b_1.shape)
        grad_W_1 = (np.dot(self.X_train[j], grad_f_1.T)).T
        #print('grad_W_1', grad_W_1.shape)
        
        self.W_1 -= lr*grad_W_1 
        self.b_1 -= lr*grad_b_1 
        
        self.W_2 -= lr*grad_W_2 
        self.b_2 -= lr*grad_b_2 
        
        '''
        self.f_1 = np.zeros((1, self.hidden_units))
        self.a_1 = np.zeros((1, self.hidden_units))
        self.f_2 = np.zeros((1, self.output_units))
        self.a_2 = np.zeros((1, self.output_units))
        
        grad_se = np.zeros((1, self.output_units))
        grad_a_2 = np.zeros((1, self.output_units))
        grad_b_2 = np.zeros((1, self.output_units))
        grad_W_2 = np.zeros((self.hidden_units, self.output_units))
        grad_a_1 = np.zeros((1, self.hidden_units))
        grad_f_1 = np.zeros((1, self.hidden_units))
        grad_b_1 = np.zeros((1, self.hidden_units))
        grad_W_1 = np.zeros((2, self.hidden_units))
        '''
    
    def train(self, X_train, Y_train, n_epochs=10, lr=0.1):
        self.X_train = X_train
        self.Y_train = Y_train
        self.n_epochs = n_epochs
        self.lr = lr
        
        for i in range(n_epochs):
            e_loss = 0
            e_acc = 0
            for j in range(len(X_train)):
                pred = self.forward(j)
                Y_train[j] = np.expand_dims(Y_train[j], axis=0)
                #print('pred','Y_train[j]',pred.shape, Y_train[j].shape)
                error = Y_train[j] - pred
                squared_loss = np.power(Y_train[j] - pred,2).sum()
                
                #print('pred',pred)
                #print('error',squared_loss)
                self.calculate_gradients(error, lr, j)
                e_loss += squared_loss
                e_acc += np.argmax(Y_train[j]) == np.argmax(pred)
                #print()
            print('Loss, acc:', e_loss, (e_acc/len(X_train))*100)
            
    def predict(self, item):
        f_1 = np.dot(self.W_1, item) + self.b_1
        a_1 = 1/(1+np.exp(-f_1))
        f_2 = np.dot(self.W_2, a_1) + self.b_2
        a_2 = 1/(1+np.exp(-f_2))
        return a_2

In [6]:
model = Model()

In [7]:
model.train(normalized_X, train_Y)

Loss, acc: 25.988121910616663 91.0
Loss, acc: 10.312095807343058 98.0
Loss, acc: 7.32358564455169 98.0
Loss, acc: 6.005175705856958 98.0
Loss, acc: 5.265110938387665 98.0
Loss, acc: 4.792382429461447 98.0
Loss, acc: 4.464340241742168 98.0
Loss, acc: 4.223185694116203 98.0
Loss, acc: 4.038190800468335 98.0
Loss, acc: 3.891532596512861 98.0


In [9]:
model.predict(orig_normalized_X[99])

array([[0.79869678],
       [0.19672225]])

In [22]:
a = np.array([[0.54603016],
       [0.39660734]])

In [24]:
a>0.5 

array([[ True],
       [False]])

In [26]:
train_Y[0] == 1

array([[False],
       [ True]])

In [27]:
np.argmax(a)

0

In [28]:
np.argmax(train_Y[0])

1