In [1]:
import numpy as np
import pandas as pd

In [2]:
cars_df = pd.read_csv('../static/data/cardata.csv', header=None)
cars_df.head(6)

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
5,vhigh,vhigh,2,2,med,high,unacc


In [3]:
# Check if there are any missing values
cars_df.isnull().any()

0    False
1    False
2    False
3    False
4    False
5    False
6    False
dtype: bool

In [4]:
# Split class column
y = cars_df[6]
print(y.shape)

(1728,)


In [5]:
# One hot encoding for categorical columns
x = pd.get_dummies(cars_df.iloc[:, :-1])
x.to_csv('../static/data/build_nn_part1_x_dummy.csv', index=False)

In [6]:
# Encode y into 0 and 1
y[y != 'unacc'] = 1
y[y == 'unacc'] = 0
print(y)

# y.to_csv('../static/data/build_nn_part1_y.csv', index=False)

0       0
1       0
2       0
3       0
4       0
       ..
1723    1
1724    1
1725    0
1726    1
1727    1
Name: 6, Length: 1728, dtype: object


In [15]:
# Build the neuron unit
# Reading X and y as numpy arrays
x = np.array(x)
y = np.array(y, dtype=np.float)

In [8]:
print(x.shape)

(1728, 21)


In [9]:
# Randomly initialise weights and bias
W = np.random.uniform(low=-0.5, high=0.5, size=(x.shape[1], 1))
b = 0 # scalar Value
print('Weights matrix: ', W.shape)
print('Range of values in weights matrix = [{} - {}]'.format(W.min(), W.max()))

Weights matrix:  (21, 1)
Range of values in weights matrix = [-0.47355377833155265 - 0.41947594071267]


In [10]:
# Define sigmoid function
def sigmoid(z):
    a = np.zeros([1, 1])
    a = 1 / ( 1 + np.exp(-z))

    return a

In [17]:
from scipy import special
import time

# Iteration over the whole dataset
t0 = time.time()
final_acc = 0
epochs = 500

for j in range(epochs): # for each sample
    J=0; dW=np.zeros_like(W); db=0
    Z = 0
    pred = []
    correct = 0
    acc = 0

    print('*'*70)
    print('Epoch No: ', j)
    # print('\n\nOriginal Weights for epoch {}: {}'.format(j, W))
    print('Weights transpose shape: ', W.T.shape)
    print('Inputs shape: ', x.shape)

    # Calculate Z
    Z = np.dot(x, W)
    print(Z.shape)
    
    Z += b
    # print('\nZ for epoch {}: {}'.format(j, z))
    print('\nZ shape: {}'.format(Z.shape))

    # Calculate a (activation)
    y_hat = sigmoid(Z)
    print('\nActivation for epoch {}: {}'.format(j, y_hat.shape))

    # Calculate loss
    J = -1 * (special.xlogy(y, y_hat) + special.xlogy((1 - y), (1 - y_hat)))
    J /= y.shape[0]
    # print('\Cost for epoch {}: {}'.format(j, J/y.shape))
    print(J.shape)

    # Calculate dz
    print('Labels shape', y.shape)
    dZ = y_hat - y
    print('\ndZ for epoch {}: {}'.format(j, dZ.shape))

    # Calculate weight changes
    dW = np.dot(x.T, dZ) / y.shape[0]

    # Calculate change in bias
    db = dZ / y.shape[0]

    # Update weights
    W = W - 0.1 * dW
    b= b - 0.1 * db
    # print('New weights for epoch {}: {}'.format(j, W))

    # Calculate accuracy
    # 0.5 threshold was chose, if y_hat is > 0.5 then it is more likely to be of class 1, otherwise it's class 0
    for m in y_hat[0]:
        if m < 0.5:
            pred.append(0)
        else:
            pred.append(1)

    correct = np.sum(y==pred)
    print('No. of correct predictions: ', correct)
    
    acc = correct / y.shape[0]
    final_acc += correct
    print('Accuracy for epoch {} = {:.2f}%'.format(j, acc*100))

print('-'*70)
print('Time elapsed for one iteration over the whole dataset: ', time.time() - t0)
print('Final Accuracy for epoch {} = {:.2f}%'.format(j, (final_acc / (epochs*y.shape[0]))*100))

**********************************************************************
Epoch No:  0


Original Weights for epoch 0: [[-0.02391626]
 [ 0.26418715]
 [-0.4628498 ]
 [-0.17729477]
 [ 0.19781637]
 [-0.26989544]
 [ 0.41947594]
 [ 0.04134083]
 [ 0.07554025]
 [-0.21575389]
 [-0.38469976]
 [-0.47355378]
 [-0.23713811]
 [ 0.07732732]
 [-0.21022006]
 [-0.39502631]
 [ 0.12944072]
 [-0.24977425]
 [-0.06365885]
 [-0.01500194]
 [ 0.40122039]]
Weights transpose shape:  (1, 21)
Inputs shape:  (1728, 21)
(1728, 1)

Z shape: (1728, 1)

Activation for epoch 0: (1728, 1)
(1728, 1728)
Labels shape (1728,)

dZ for epoch 0: (1728, 1728)

Changes in weights for epoch 0: [[ 0.10353623  0.10353623  0.10353623 ...  0.10353623 -0.14646377
  -0.14646377]
 [ 0.12043385  0.12043385  0.12043385 ...  0.12043385 -0.12956615
  -0.12956615]
 [ 0.0793913   0.0793913   0.0793913  ...  0.0793913  -0.1706087
  -0.1706087 ]
 ...
 [ 0.1197472   0.1197472   0.1197472  ...  0.1197472  -0.21358613
  -0.21358613]
 [ 0.12330823  0.1