<a href="https://colab.research.google.com/github/Hainguyendangduc/DeepLearning_PJ/blob/main/salary_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
data = pd.read_csv("Social_Network_Ads.csv")
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [None]:
X = data.iloc[:, [2, 3]].values
y = data.iloc[:, [-1]].values

print(X.shape, y.shape)

(400, 2) (400, 1)


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(320, 2) (320, 1)
(80, 2) (80, 1)


In [None]:
X_max = X_train.max(axis=0, keepdims=True)
X_min = X_train.min(axis=0, keepdims=True)

X_train_scaled = (X_train - X_min) / (X_max - X_min)
X_test_scaled = (X_test - X_min) / (X_max - X_min)

X_train_scaled = np.hstack((np.ones((X_train_scaled.shape[0], 1)), X_train_scaled))
X_test_scaled = np.hstack((np.ones((X_test_scaled.shape[0], 1)), X_test_scaled))

print(X_test_scaled.shape)

(80, 3)


In [None]:
g = lambda z: 1 / (1 + np.exp(-z))
def predict_prob(X, w):
  z = np.dot(X,w.T)
  return g(z)
def predict(X,w):
  y_hat = predict_prob(X, w)
  y_hat[y_hat>0.5] = 1
  y_hat[y_hat<0.5] = 0
  return y_hat

In [None]:
def loss(X, y, w):
  y_hat = predict_prob(X, w)
  l = y * np.log(y_hat) + (1-y) * np.log(1 - y_hat)
  return -np.mean(l)

In [None]:
def grad(X, y, w):
  y_hat = predict_prob(X,w)
  delta = y_hat - y

  dw = np.dot(X.T, delta)
  return dw.T

In [None]:
def batch_generator(X, y, batch_size=32):
  idx = np.arange(X.shape[0])
  np.random.shuffle(idx)

  n_batch = len(idx) // batch_size
  for i in range(n_batch):
    i_start = i * batch_size
    i_stop = min((i + 1) * batch_size, len(idx))

    yield X[i_start:i_stop, :], y[i_start:i_stop, :]

EPOCHS = 10000
LR = 0.01

w = np.random.randn(1, 3)

for i in range(EPOCHS):
  for X_batch, y_batch in batch_generator(X_train_scaled, y_train):
    dw = grad(X_batch, y_batch, w)
    w -= LR * dw

print("Final loss = ", loss(X_train_scaled, y_train, w))

Final loss =  0.3728907863574754


In [None]:
y_pred = predict(X_train_scaled, w)
y_pred[y_pred>0.5] = 1
y_pred[y_pred<0.5] = 0

from sklearn.metrics import classification_report
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.93      0.88       205
           1       0.84      0.69      0.76       115

    accuracy                           0.84       320
   macro avg       0.84      0.81      0.82       320
weighted avg       0.84      0.84      0.84       320



In [None]:
g = lambda z: 1 / (1 + np.exp(-z))
class SigmoidLayer:
  def __init__(self, n_inp, n_out, bias=True):
    if bias: n_inp += 1
    w = np.random.randn(n_inp, n_out)
    self.w = w
    self.bias = bias
    self.is_forward = False

  def forward(self, X):
    if self.bias:
      X = np.hstack((np.ones((X.shape[0],1)),X))

    z = np.dot(X,self.w)
    a = g(z)

    self.is_forward = True
    self.x = X
    self.a = a
    return a

  def back_propagation(self, delta):
    a = self.a
    dw = np.dot(self.x.T, delta*a*(1.0-a))

    if self.bias:
      da = np.dot(delta*a*(1.0-a), self.w.T)[:, 1:]
    else:
      da = np.dot(delta*a*(1.0-a),self.w.T)

    return dw, da

In [None]:
NDIMS = 3
L1 = 32
L2 = 32
EPOCHS = 500
LR = 0.02

layers = [SigmoidLayer(NDIMS, L1, bias=False),SigmoidLayer(L1, L2),SigmoidLayer(L2,1)]

In [None]:
def predict(X, model):
  for l in model:
    X = l.forward(X)
  return X

y_pred = predict(X_train_scaled, model = layers)

In [None]:
def train_step(X, y, model, lr=0.01):
  y_pred = predict(X, model)
  delta = y_pred - y

  for l in reversed(model):
    dw, delta = l.back_propagation(delta)
    l.w -= lr*dw

for i in range(10000):
  train_step(X_train_scaled, y_train, model=layers)

In [None]:
y_pred = predict(X_train_scaled, model = layers)
y_pred[y_pred>=0.5] = 1
y_pred[y_pred<0.5] = 0
from sklearn.metrics import classification_report
print(classification_report(y_train,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.92      0.94       205
           1       0.87      0.92      0.89       115

    accuracy                           0.92       320
   macro avg       0.91      0.92      0.92       320
weighted avg       0.92      0.92      0.92       320



In [None]:
y_pred = predict(X_train_scaled, w)
y_pred[y_pred>0.5] = 1
y_pred[y_pred<0.5] = 0

from sklearn.metrics import classification_report
print(classification_report(y_train, y_pred))

In [None]:
NDIMS = 3
L1 = 64
L2 = 108
EPOCHS = 10000
LR = 0.001

w1 = np.random.randn(NDIMS, L1)
w2 = np.random.randn(L1 + 1, L2)
w3 = np.random.randn(L2+1, 1)

def predict(X, w1, w2, w3):
  z1 = np.dot(X, w1)
  a1 = g(z1)
  # add bias
  a1 = np.hstack((np.ones((a1.shape[0], 1)), a1))

  z2 = np.dot(a1, w2)
  a2 = g(z2)
  # add bias
  a2 = np.hstack((np.ones((a2.shape[0], 1)), a2))

  z3 = np.dot(a2, w3)
  return g(z3)

def loss(X, y, w1, w2, w3):
  y_hat = predict(X, w1, w2, w3)
  l = y * np.log(y_hat) + (1-y) * np.log(1 - y_hat)
  return -np.mean(l)
def grad(X, y, w1, w2, w3):
  #
  # Feed forward 
  #
  z1 = np.dot(X, w1)
  a1 = g(z1)
  # add bias
  a1 = np.hstack((np.ones((a1.shape[0], 1)), a1))

  z2 = np.dot(a1, w2)
  a2 = g(z2)
  # add bias
  a2 = np.hstack((np.ones((a2.shape[0], 1)), a2))

  z3 = np.dot(a2, w3)
  a3 = g(z3)

  #
  # Back propagation
  # 

  #error from output layer
  delta3 = a3 - y
  dw3 = np.dot(a2.T, delta3 * a3 * (1-a3))
  da2 =np.dot(delta3 * a3 * (1-a3), w3.T)[:, 1:] # m x L2+1

  #error from the second hidden layer
  #need to remove the first column (bias)
  dw2 = np.dot(a1.T, da2 * a2[:, 1:] * (1-a2[:, 1:]))
  da1 = np.dot(da2 * a2[:, 1:] * (1-a2[:, 1:]), w2.T)[:, 1:]
  #error from the first hidden layer 
  dw1 = np.dot(X.T, da1 * a1[:, 1:]*(1-a1[:, 1:]))

  return dw1, dw2, dw3

print("Initial loss = ", loss(X_train_scaled,  y_train, w1, w2, w3))
for i in range(EPOCHS):
  for X_batch, y_batch in batch_generator(X_train_scaled, y_train):
    dw1, dw2, dw3 = grad(X_batch, y_batch, w1, w2, w3)
    w1 -= LR * dw1
    w2 -= LR * dw2
    w3 -= LR * dw3

print("Final loss = ", loss(X_train_scaled,  y_train, w1, w2, w3))