In [1]:
import numpy as np
import pandas as pd
import copy

In [2]:
# number of students
m = 10000

In [3]:
def examResultGenerator(m):
    examRes = []
    for i in range(m*3):
        examRes.append(np.random.randint(0,101))
    examRes = np.array(examRes)
    # midterm1 / midterm2 / final
    examRes = examRes.reshape(m, 3)
    return examRes

In [4]:
# 30% midterm1 + 30% midterm2 + 40% final > 40 ==> Pass
def results(examRes):
    isPass = []
    for i in range(m):
        if ((examRes[i][0] * 0.30) + (examRes[i][1] * 0.30) + (examRes[i][2] * 0.40)) > 40:
            isPass.append(1)
        else:
            isPass.append(0)   
    isPass = np.array(isPass)
    return isPass   

In [5]:
def dataGenerator(m):
    examRes = examResultGenerator(m)
    isPass = results(examRes)
    
    sum = 0
    for i in range(m):
        if isPass[i] == 1:
            sum +=1
    print("# of Pass: ", sum)
    print("# of Fail: ", m - sum)
    
    return (examRes, isPass)

## 1 Layer NN

In [6]:
def initialize_with_zeros(dim):
    w = np.zeros((dim,1))
    b = 0.0
    
    return w, b

In [7]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    
    return s

In [8]:
def propagate(w, b, X, Y):    
    m = X.shape[1]
    A = sigmoid(np.dot(w.T,X) + b)
    cost = (-1 / m) * np.sum(Y * np.log(A) + ((1- Y) * np.log(1-A)))

    dw = (1 / m) * (np.dot(X,(A - Y).T))
    db = (1 / m) * np.sum(A - Y)
    cost = np.squeeze(np.array(cost))
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

In [9]:
def optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost=False):
    w = copy.deepcopy(w)
    b = copy.deepcopy(b)
    
    costs = []
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
  
        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [10]:
def predict(w, b, X):
 
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid(np.dot(w.T,X) + b)
   
    for i in range(A.shape[1]):
        if A[0,i] > 0.5:
            Y_prediction[0,i] = 1
        else:
            Y_prediction[0,i] = 0
    
    return Y_prediction

In [11]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
   
    w, b = initialize_with_zeros(X_train.shape[0])
    params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
 
    w = params["w"]
    b = params["b"]
    
    Y_prediction_test = predict(w, b, X_test) 
    Y_prediction_train = predict(w, b, X_train)
      
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [12]:
from sklearn.model_selection import train_test_split
def split(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
    
    print("# of train: ", X_train.shape[0])
    print("# of test:  ", X_test.shape[0])
    
    return (X_train, X_test, y_train, y_test)

In [13]:
from sklearn.preprocessing import StandardScaler
def scale(X_train, X_test):
    sc = StandardScaler()
    X_train_scaled = sc.fit_transform(X_train)
    X_test_scaled = sc.transform(X_test)
    
    return (X_train_scaled, X_test_scaled)

In [14]:
def prepareData(X,y):
    X_train, X_test, y_train, y_test = split(X,y)
    X_train_scaled, X_test_scaled = scale(X_train, X_test)
    
    X_train, X_test, y_train, y_test = X_train_scaled.T, X_test_scaled.T, y_train.T, y_test.T
    
    return (X_train, X_test, y_train, y_test)

In [15]:
X, y = dataGenerator(m)

# of Pass:  7126
# of Fail:  2874


In [16]:
X_train, X_test, y_train, y_test = prepareData(X,y)

# of train:  7000
# of test:   3000


In [17]:
d = model(X_train, y_train, X_test, y_test, num_iterations=2000, learning_rate=0.1, print_cost=True)

Cost after iteration 0: 0.693147
Cost after iteration 100: 0.271998
Cost after iteration 200: 0.207076
Cost after iteration 300: 0.176976
Cost after iteration 400: 0.158604
Cost after iteration 500: 0.145843
Cost after iteration 600: 0.136284
Cost after iteration 700: 0.128757
Cost after iteration 800: 0.122619
Cost after iteration 900: 0.117481
Cost after iteration 1000: 0.113091
Cost after iteration 1100: 0.109279
Cost after iteration 1200: 0.105926
Cost after iteration 1300: 0.102944
Cost after iteration 1400: 0.100267
Cost after iteration 1500: 0.097844
Cost after iteration 1600: 0.095638
Cost after iteration 1700: 0.093615
Cost after iteration 1800: 0.091752
Cost after iteration 1900: 0.090028
