In [1]:
import numpy as np 
import pandas as pd

In [2]:
def sigmoid(z):
    s = 1/(1+np.exp(-z))
    return s

In [3]:
def transpose_all(x):
    a = x.copy()
    a = a.T
    return x, a

In [4]:

def initialize_with_zeros(dim):
    w = np.random.randn(dim,1)*0.01
    b = 0
    return w, b

In [5]:
def propagate(w, b, X, Y):
    
    m = X.shape[1]
    A = sigmoid(np.dot(w.T,X)+b)                                     # compute activation
    cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A)))  # compute cost
    
    dw =    (1 / m) * np.dot(X, (A - Y).T)
    db =    (1 / m) * np.sum(A - Y)
    cost = np.squeeze(cost)
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

In [6]:

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False, print_every=100):
    
    
    costs = []
    
    for i in range(num_iterations):
        grads, cost =  propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        w = w - np.dot(learning_rate, dw)
        b = b -  learning_rate* db
        if i % 50 == 0:
            costs.append(cost)
        
        if print_cost and i % print_every == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [12]:

def predict(w, b, X):
    
    
    X, X_T = transpose_all(X)
    
    m = X_T.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X_T.shape[0], 1)
    
    A = sigmoid(np.dot(w.T, X_T)+b)
    
    for i in range(A.shape[1]):
        
        if A[0][i]>0.5:
            Y_prediction[0][i]=1
        else:
            Y_prediction[0][i]=0
        
    
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

In [18]:

def model(X_train, Y_train, X_test, Y_test, num_iterations = 500, learning_rate = 0.5, print_cost = True,print_every=100):
    
    
    X_train, X_train_T = transpose_all(X_train)
    Y_train, Y_train_T = transpose_all(Y_train)
    X_test, X_test_T = transpose_all(X_test)
    Y_test, Y_test_T = transpose_all(Y_test)
    
    
    
    w, b = initialize_with_zeros(X_train_T.shape[0])

    parameters, grads, costs = optimize(w, b, X_train_T, Y_train_T, num_iterations, learning_rate, print_cost, print_every=print_every)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)


    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train_T)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test_T)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [19]:
data = pd.read_csv('data/mushrooms.csv')
from sklearn.preprocessing import LabelEncoder
ch = list(data.columns.values)
for i in ch:
    encoder = LabelEncoder()
    col = data[i]
    col = encoder.fit_transform(col)
    data[i]=col
x = data.drop('class', axis=1).values
y = data['class'].values
y = y.reshape((-1,1))
print(x.shape, y.shape)

(8124, 22) (8124, 1)


In [20]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2, random_state=32)

In [21]:
d = model(x_train, y_train, x_test, y_test, num_iterations = 100000, learning_rate = 0.001, print_every=20000)

Cost after iteration 0: 0.730045
Cost after iteration 20000: 0.297701
Cost after iteration 40000: 0.266001
Cost after iteration 60000: 0.247684
Cost after iteration 80000: 0.235586
train accuracy: 92.8142791198646 %
test accuracy: 92.67692307692307 %


# image data

In [22]:
# %load liveness_detection/labels.py
import pandas as pd 
import cv2

images = []
labels = []
for i in range(1000):
    img = cv2.imread("data/img-live/" + str(i)+".jpg", 0)
    
    img = cv2.resize(img, (100,100))
    images.append(img)
    labels.append(0)

for i in range(1000):
    img = cv2.imread("data/img-not-live/" + str(i)+".jpg", 0)
    img = cv2.resize(img, (100,100))
    images.append(img)
    labels.append(1)
x = np.array(images)
y = np.array(labels)
x = x.reshape((-1,100*100))
x = x/255
y = y.reshape((-1,1))
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size= .25, random_state =32)

In [23]:
d = model(x_train, y_train, x_test, y_test, num_iterations = 10000, learning_rate = 0.001, print_cost = True, print_every=1000)

Cost after iteration 0: 0.900131
Cost after iteration 1000: 0.144867
Cost after iteration 2000: 0.104179
Cost after iteration 3000: 0.083717
Cost after iteration 4000: 0.070589
Cost after iteration 5000: 0.061258
Cost after iteration 6000: 0.054220
Cost after iteration 7000: 0.048698
Cost after iteration 8000: 0.044237
Cost after iteration 9000: 0.040550
train accuracy: 99.6 %
test accuracy: 98.6 %
