(1) Import Libraries and define constants

In [2]:
import numpy as np
import scipy 
import os
import matplotlib.pyplot as plot
from scipy import ndimage

In [79]:
# Global Constants
side_length = 650
image_path = "./Logistic_Regression_Data/"
test_ratio = .2
epoch = 1000
learning_rate = .005

(2) Create train and test examples:

In [4]:
def getImageArrays(path, side_length): #returns list of images arrays for a specified path
    image_names = os.listdir(path)
    examples = []
    for image_name in image_names:
        if image_name.split(".")[-1] != "DS_Store":
            cur_image_path = path + image_name
            cur_array = scipy.ndimage.imread(cur_image_path,flatten=False) #reads image as numpy array (lenght,height,3)
            cur_array_resized = scipy.misc.imresize(cur_array,size=(side_length,side_length,3)) #resizes images to a uniform shape
            examples += [cur_array_resized] 
    return examples

In [5]:
#create examples & labels
cow_images_path = image_path + "cows/"
notCow_image_path = image_path + "notcows/"

examples_cow = getImageArrays(cow_images_path, side_length)
labels_cow = np.ones(len(examples_cow))
examples_notCow = getImageArrays(cow_images_path, side_length)
labels_notCow = np.zeros(len(examples_notCow))

examples_cow = np.array(examples_cow)
examples_notCow = np.array(examples_notCow)

assert(examples_cow.shape[1:] == (side_length,side_length,3)), "examples_cow are invalid shape"
examples = np.concatenate((examples_cow,examples_notCow))
labels = np.concatenate((labels_cow,labels_notCow))

In [6]:
#shuffle examples and labels
shuffled_indexing = np.random.permutation(labels.shape[0])
examples = examples[shuffled_indexing]
labels = labels[shuffled_indexing]

In [32]:
#seperate train and test examples
number_examples_test = int(len(examples)*test_ratio)
number_labels_test = int(len(labels)*test_ratio)

examples_test = examples[:number_examples_test]
examples_train = examples[number_examples_test:]
labels_test = labels[:number_labels_test]
labels_train = labels[number_labels_test:]
print("Number of training examples: ", examples_train.shape[0])
print("Number of test examples: ", examples_test.shape[0])

Number of training examples:  320
Number of test examples:  80


In [33]:
#reshape labels for future matrix operations
labels_train = np.reshape(labels_train,(1,len(labels_train)))
labels_test = np.reshape(labels_test,(1,len(labels_test)))
print("labels_train",labels_train.shape)
print("labels_test",labels_test.shape)

labels_train (1, 320)
labels_test (1, 80)


In [34]:
#flatten examples
flattened_train_examples = examples_train.reshape(examples_train.shape[0], -1).T
flattened_test_examples = examples_test.reshape(examples_test.shape[0], -1).T  
print("flattened examples",flattened_test_examples.shape,flattened_train_examples.shape)

flattened examples (1267500, 80) (1267500, 320)


In [35]:
# Standardize color values of the image (decrease computational cost durring cross entropy)
standardized_train_examples = flattened_train_examples/255 #225 is the maximum rgb value/ This is done to decrease varaince in inputs thus more efficint
standardized_test_examples = flattened_test_examples/255
print("standardized",standardized_test_examples.shape,standardized_train_examples.shape)

standardized (1267500, 80) (1267500, 320)


(3) Defeine basic functions

In [16]:
def sigmoid(x):
    output = 1.0/(1.0+np.exp(-x))
    return output

In [17]:
def initialize_zeros(dimension): #dimension = len(flattened_examples)
    zeroed_weights = np.zeros((dimension, 1)) 
    zeroed_baisies = 0. #baisies always start at 0 b/c they are developed as the model trains
    assert(isinstance(zeroed_baisies, float) or isinstance(zeroed_baisies, int))
    
    return zeroed_weights, zeroed_baisies

In [77]:
def crossEntropyLoss(a,Y):
    m = Y.shape[1] #len(Y) doesn't work, need 2nd dimesnion
#     print(a.shape)
    #print("BEFORE:")
    #print(a)
    a[0][a[0]==1] = .999
    a[0][a[0]==0] = .001
#     a = [0.0001 for prediction in a[0] if prediction == 0]
#     a = [0.9999 for prediction in a if prediction == 1]
    #print("AFTER:")
    #print(a)
    #a = np.reshape(a,(1,a.shape[1]))
    #print(a.shape,"changed")
    #a = 0.0001 if a == 0 else a
    #a = 0.9999 if a == 1 else a
    loss = -(1 / m) * np.sum(Y * np.log(a) + (1 - Y) * np.log(1 - a))
    return loss

(4) Foward Pass

In [19]:
def propagate(X,Y,w,b):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
    # Y = (1, examples)
#     print("propagate shapes")
#     print("X",np.shape(X))
#     print("Y",np.shape(Y))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    
    w_transposed = w.T
    num_of_examples = np.shape(X)[1]
    
    dot_product = np.dot(w_transposed,X) #dot product of X and w be careful to use correct order
    z = dot_product + b
    a = sigmoid(z) #in begining should be 0
    loss = crossEntropyLoss(a,Y)
    dw = (1/num_of_examples) * np.dot(X,(a-Y).T) #?
    db = (1/num_of_examples) * np.sum(a-Y) #np.sum collapes input
    loss = np.squeeze(loss)
    grads = {"dw":dw,"db":db}
    return grads,loss    


In [36]:
w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
grads, cost = propagate(X, Y,w,b)
print ("dw = " + str(grads["dw"]))
print ("db = " + str(grads["db"]))
print ("cost = " + str(cost))

dw = [[ 0.99845601]
 [ 2.39507239]]
db = 0.00145557813678
cost = 5.80154531939


(5) Optimize

In [81]:
def optimizer(w, b, X, Y, learning_rate, epoch, print_results=True):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
    # Y = (1, examples)
#     print("optimizer shapes")
#     print("X",np.shape(X))
#     print("Y",np.shape(Y))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    costs = []
    for i in range(epoch):
        grads, cost = propagate(X,Y,w,b)
        dw = grads["dw"]
        db = grads["db"]
        w = w - (learning_rate * dw)
        b = b - (learning_rate * db)
        if (i % 100 == 0) and (print_results == True):
            costs += [cost]
            print("cost",cost,(i+1)*100/epoch,"%")
            
    if print_results == True:
        print("costs" , costs )
    grads = {"dw":dw,"db":db}
    params = {"w":w,"b":b}
    return grads,params,costs

In [39]:
w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
grads,params,cost = optimizer(w,b,X, Y, .009, 100, print_results=False)
print ("w = " + str(params["w"]))
print ("b = " + str(params["b"]))
print ("dw = " + str(grads["dw"]))
print ("db = " + str(grads["db"]))

w = [[ 0.19033591]
 [ 0.12259159]]
b = 1.92535983008
dw = [[ 0.67752042]
 [ 1.41625495]]
db = 0.219194504541


(6) Validate

In [41]:
def predictor(X,w,b):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
#     print("predictor shapes")
#     print("X",np.shape(X))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    w_transposed = w.T
    prediction = (np.dot(w_transposed,X)) + b
    prediction = sigmoid(prediction)
    prediction *= 2.0 # so prediction is either 1 or -1
    prediction = np.floor(prediction)
    return prediction

In [42]:
w = np.array([[0.1124], [0.231]])
b = -.3
X = np.array([[1,-1.1,-3.2],[1.2,2,.1]])
print(predictor(X,w,b))

[[ 1.  1.  0.]]


(7) Driver

In [44]:
def logisticRegression(train_X,train_Y,test_X,test_Y,learning_rate,epoch):
    # train_x = (image_width * image_width * 3, training_examples)
    # train_y = (1, train_examples)
    # test_x = (____, test_examples)
    # test_y = (1, test_examples)
    
    assert(train_X.shape[1] == train_Y.shape[1]), "train exampes dimensions invalid"
#     print("logisticShapes")
#     print("train x",train_X.shape)
#     print("train y",train_Y.shape)
#     print("test X",test_X.shape)
#     print("test Y",test_Y.shape)
    #Inialize w/b
    w,b = initialize_zeros(side_length*side_length*3)
    
    #Train
    grads, params, costs = optimizer(w, b, train_X, train_Y, learning_rate, epoch)
    w, b = params["w"], params["b"]
    print("Training finsihed")

    #Eval
    prediction_train = predictor(train_X,w,b)
    accuracy_train = np.mean(np.abs(prediction_train - train_Y))
    accuracy_output = "train accuracy {} %" .format(100 - accuracy_train * 100)
    print(accuracy_output)
    
    #Test
    prediction_test = predictor(test_X,w,b)
    accuracy_test = np.mean(np.abs(prediction_test - test_Y))
    accuracy_output = "Actural accuarcy {} %" .format(100 - accuracy_test * 100)
    print(accuracy_output)
    
    d = {
        "costs": costs,
        "Y_prediction_test" : prediction_test,
        "X_prediction_train" : prediction_train,
        "w" : w,
        "b" : b,
        "learning rate" : learning_rate,
        "epoch" : epoch
    }
    
    return d
        

In [82]:
train_X = standardized_train_examples
train_Y = labels_train
test_X = standardized_test_examples
test_Y = labels_test

# print(test_X.shape)
# print(test_X[0])
# print(test_X[0].shape)

# print("start")
# print(train_X.shape,"train_X")
# print(train_Y.shape,"train_Y")
# print(test_X.shape,"test_X")
# print(test_Y.shape,"test_Y")

logisticRegression(train_X,train_Y,test_X,test_Y,learning_rate,epoch)

cost 0.69314718056 0.1 %


  


cost 75.1541871903 10.1 %
cost 100.23671073 20.1 %
cost 90.7615053017 30.1 %
cost 93.795629712 40.1 %
cost 96.0179993611 50.1 %
cost 96.0296414915 60.1 %
cost 93.5860193042 70.1 %
cost 94.2518280708 80.1 %
cost 95.5111859127 90.1 %
costs [0.6931471805599454, 75.154187190324834, 100.23671073011566, 90.761505301682249, 93.795629711981206, 96.017999361092166, 96.02964149147472, 93.586019304239329, 94.251828070770216, 95.511185912724372]
Training finsihed
train accuracy 54.6875 %
Actural accuarcy 18.75 %


{'X_prediction_train': array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  2.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  2.,  0.,  0.,
          1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0