In [32]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import xlogy
import h5py
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [33]:
def load_dataset():
  data=datasets.load_iris()
  x=data.data[:,:2]
  y=(data.target!=0)*1
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
  return X_train, X_test, y_train, y_test

In [34]:
def relu(Z):
  return np.maximum(Z, 0)

In [35]:
def relu_prime(Z):
  return (Z>0).astype(Z.dtype)

In [36]:
def sigmoid(Z):
  return 1/(1+np.power(np.e, -Z))

In [37]:
def sigmoid_prime(Z):
  return Z * (1-Z)

In [38]:
def leaky_relu(Z, alpha=0.01):
  return np.where(Z > 0, Z, Z * alpha)

In [39]:
def leaky_relu_prime(Z, alpha=0.01):
  dz = np.ones_like(Z)
  dz[Z < 0] = alpha
  return dz

In [40]:
def tanh(Z):
  return np.tanh(Z)

In [41]:
def tanh_prime(Z):
  return 1-(tanh(Z)**2)

In [42]:
def get_activation_function(name):
  if name=='relu':
    return relu
  elif name=='sigmoid':
    return sigmoid
  elif name=='leaky_relu':
    return leaky_relu
  elif name=='tanh':
    return tanh
  else:
    raise ValueError('Only "relu", "leaky_relu", "tanh" and "sigmoid" supported')

In [43]:
def get_derivative_activation_function(name):
  if name=='relu':
    return relu_prime
  elif name=='sigmoid':
    return sigmoid_prime
  elif name=='leaky_relu':
    return leaky_relu_prime
  elif name=='tanh':
    return tanh_prime
  else:
    raise ValueError('Only "relu", "leaky_relu", "tanh" and "sigmoid" supported')

In [44]:
def initialize_layer_weights(n_l_1, n_l, random_state=0):
  np.random.seed(random_state)
  wl = np.random.randn(n_l_1, n_l) * np.sqrt(2/n_l_1)
  bl = np.random.randn(1, n_l) * np.sqrt(2/n_l_1)
  return {'wl': wl, 'bl': bl}

In [45]:
class Dense:
  def __init__(self, input_dim, units, activation, random_state=0):
    params = initialize_layer_weights(input_dim, units, random_state)
    self.units = units
    self.W = params['wl']
    self.b = params['bl']
    self.activation = activation
    self.Z = None
    self.A = None
    self.dz = None
    self.da = None
    self.dw = None
    self.db = None

In [46]:
def forward_prop(X, model):
  for i in range(len(model)):
    if i==0:
      X_l_1 = X.copy()
    else:
      X_l_1 = model[i-1].A
    model[i].Z = np.dot(X_l_1, model[i].W) + model[i].b
    model[i].A = get_activation_function(model[i].activation)(model[i].Z) 
  return model

In [47]:
def calculate_loss(y, model):
  m = y.shape[0]
  A = model[-1].A
  return np.squeeze(-(1./m)*np.sum(np.multiply(y, np.log(A))+np.multiply(np.log(1-A), 1-y)))

In [48]:
def backward_prop(X, y, model):
  m = X.shape[0]
  for i in range(len(model)-1, -1, -1):
    if i==len(model)-1:
      model[i].dz = model[-1].A - y
      model[i].dw = 1./m * np.dot(model[i-1].A.T, model[i].dz)
      model[i].db = 1./m * np.sum(model[i].dz, axis=0, keepdims=True)
      model[i-1].da = np.dot(model[i].dz, model[i].W.T)
      #print(i)
      #print(model[i].dz.shape)
      #print(model[i].W.shape)
    else:
      #print(i)
      #print(model[i].da)
      model[i].dz = np.multiply(np.int64(model[i].A>0), model[i].da) * get_derivative_activation_function(model[i].activation)(model[i].Z)
      if i!=0:
        model[i].dw = 1./m * np.dot(model[i-1].A.T, model[i].dz)
      else:
        model[i].dw = 1./m * np.dot(X.T, model[i].dz)
      model[i].db = 1./m * np.sum(model[i].dz, axis=0, keepdims=True)
      if i!=0:
        model[i-1].da = np.dot(model[i].dz, model[i].W.T)
        #print(i)
        #print(model[i].dz.shape)
        #print(model[i].W.shape)
  return model

In [49]:
def update_weights(model, learning_rate=0.01):
  for i in range(len(model)):
    model[i].W -= learning_rate*model[i].dw
    model[i].b -= learning_rate*model[i].db
  return model

In [50]:
def predict(X, y, model):
  model1 = forward_prop(X, model.copy())
  predictions = np.where(model1[-1].A > 0.5, 1, 0)
  return predictions

In [51]:
def print_mislabeled_images(classes, X, y, p):
  a = p + y
  mislabeled_indices = np.asarray(np.where(a == 1))
  plt.rcParams['figure.figsize'] = (40.0, 40.0)
  num_images = len(mislabeled_indices[0])
  for i in range(num_images):
      index = mislabeled_indices[0][i]
      plt.subplot(2, num_images, i + 1)
      plt.imshow(X[index, :].reshape(64,64,3), interpolation='nearest')
      plt.axis('off')
      plt.title("Prediction: " + classes[int(p[index, 0])].decode("utf-8") + " \n Class: " + classes[y[index, 0]].decode("utf-8"))

In [52]:
X_train, X_test, y_train, y_test = load_dataset()
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

In [53]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(100, 2)
(100, 1)
(50, 2)
(50, 1)


In [54]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [55]:
random_state = 42
epochs = 1

model = []
model.append(Dense(input_dim = X_train.shape[1], units=20, activation='relu', random_state=random_state))
model.append(Dense(input_dim = 20, units=16, activation='relu', random_state=random_state))
model.append(Dense(input_dim = 16, units=8, activation='relu', random_state=random_state))
model.append(Dense(input_dim = 8, units=4, activation='relu', random_state=random_state))
model.append(Dense(input_dim = 4, units=y_train.shape[1], activation='sigmoid', random_state=random_state))

In [56]:
for i in range(epochs):
  model = forward_prop(X_train, model)
  loss = calculate_loss(y_train, model)
  model = backward_prop(X_train, y_train, model)
  model = update_weights(model, 5e-3)

  if i%(epochs/10)==0:
    print('Epoch: {}\tLoss: {:.6f}\tTrain Accuracy: {:.3f}\tTest Accuracy: {:.3f}'
    .format(i, loss, accuracy_score(y_train, predict(X_train, y_train, model)), accuracy_score(y_test, predict(X_test, y_test, model))))

Epoch: 0	Loss: 0.812263	Train Accuracy: 0.690	Test Accuracy: 0.620
