(1) Import Libraries and define constants

In [82]:
import numpy as np
import scipy 
import os
import matplotlib.pyplot as plot
from scipy import ndimage

In [83]:
# Global Constants
side_length = 650
image_path = "./Logistic_Regression_Data/"
test_ratio = .2
epoch = 1000
learning_rate = .01

(2) Create train and test examples:

In [84]:
def getImageArrays(path, side_length): #returns list of images arrays for a specified path
    image_names = os.listdir(path)
    examples = []
    for image_name in image_names:
        if image_name.split(".")[-1] != "DS_Store":
            cur_image_path = path + image_name
            cur_array = scipy.ndimage.imread(cur_image_path,flatten=False) #reads image as numpy array (lenght,height,3)
            cur_array_resized = scipy.misc.imresize(cur_array,size=(side_length,side_length,3)) #resizes images to a uniform shape
            examples += [cur_array_resized] 
    return examples

In [85]:
#create examples & labels
cow_images_path = image_path + "cows/"
notCow_image_path = image_path + "notcows/"

examples_cow = getImageArrays(cow_images_path, side_length)
labels_cow = np.ones(len(examples_cow))
examples_notCow = getImageArrays(cow_images_path, side_length)
labels_notCow = np.zeros(len(examples_notCow))

examples_cow = np.array(examples_cow)
examples_notCow = np.array(examples_notCow)

assert(examples_cow.shape[1:] == (side_length,side_length,3)), "examples_cow are invalid shape"
examples = np.concatenate((examples_cow,examples_notCow))
labels = np.concatenate((labels_cow,labels_notCow))

In [86]:
#shuffle examples and labels
shuffled_indexing = np.random.permutation(labels.shape[0])
examples = examples[shuffled_indexing]
labels = labels[shuffled_indexing]

In [87]:
#seperate train and test examples
number_examples_test = int(len(examples)*test_ratio)
number_labels_test = int(len(labels)*test_ratio)

examples_test = examples[:number_examples_test]
examples_train = examples[number_examples_test:]
labels_test = labels[:number_labels_test]
labels_train = labels[number_labels_test:] 

In [88]:
#reshape labels for future matrix operations
labels_train = np.reshape(labels_train,(1,len(labels_train)))
labels_test = np.reshape(labels_test,(1,len(labels_test)))

In [89]:
#flatten examples
flattened_train_examples = train_examples.reshape(train_examples.shape[0], -1).T
flattened_test_examples = test_examples.reshape(test_examples.shape[0], -1).T                                                                                                                                                                                                                                                              

In [90]:
# Standardize color values of the image (decrease computational cost durring cross entropy)
standardized_train_examples = flattened_train_examples/255 #225 is the maximum rgb value/ This is done to decrease varaince in inputs thus more efficint
standardized_test_examples = flattened_test_examples/255

(3) Defeine basic functions

In [91]:
def sigmoid(x):
    output = 1.0/(1.0+np.exp(-x))
    return output

In [92]:
def initialize_zeros(dimension): #dimension = len(flattened_examples)
    zeroed_weights = np.zeros((dimension, 1)) 
    zeroed_baisies = 0. #baisies always start at 0 b/c they are developed as the model trains
    assert(isinstance(zeroed_baisies, float) or isinstance(zeroed_baisies, int))
    
    return zeroed_weights, zeroed_baisies

In [93]:
def crossEntropyLoss(a,Y):
    m = Y.shape[1] #len(Y) doesn't work, need 2nd dimesnion
    loss = -(1 / m) * np.sum(Y * np.log(a) + (1 - Y) * np.log(1 - a))
    return loss

(4) Foward Pass

In [94]:
def propagate(X,Y,w,b):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
    # Y = (1, examples)
#     print("propagate shapes")
#     print("X",np.shape(X))
#     print("Y",np.shape(Y))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    
    w_transposed = w.T
    num_of_examples = np.shape(X)[1]
    
    dot_product = np.dot(w_transposed,X) #dot product of X and w be careful to use correct order
    z = dot_product + b
    a = sigmoid(z) #in begining should be 0
    loss = crossEntropyLoss(a,Y)
    dw = (1/num_of_examples) * np.dot(X,(a-Y).T) #?
    db = (1/num_of_examples) * np.sum(a-Y) #np.sum collapes input
    loss = np.squeeze(loss)
    grads = {"dw":dw,"db":db}
    return grads,loss    


(5) Optimize

In [95]:
def optimizer(w, b, X, Y, learning_rate, num_iterations, print_results=True):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
    # Y = (1, examples)
#     print("optimizer shapes")
#     print("X",np.shape(X))
#     print("Y",np.shape(Y))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(X,Y,w,b)
        dw = grads["dw"]
        db = grads["db"]
        w = w - (learning_rate * dw)
        b = b - (learning_rate * db)
        if (i % 10 == 0) and (print_results == True):
            costs += [cost]
            print("cost",cost,(i+1)*100/num_iterations,"%")
            
    if print_results == True:
        print("costs" , costs )
    grads = {"dw":dw,"db":db}
    params = {"w":w,"b":b}
    return grads,params,costs

(6) Validate

In [96]:
def predictor(X,w,b):
    # b = scalar
    # w = (____, 1)
    # X = (____, examples)
#     print("predictor shapes")
#     print("X",np.shape(X))
#     print("w",np.shape(w))
#     print("b",np.shape(b))
    w_transposed = w.T
    prediction = (np.dot(w_transposed,X)) + b
    prediction = sigmoid(prediction)
    prediction *= 2.0
    prediction = np.floor(prediction)
    return prediction

(7) Driver

In [97]:
def logisticRegression(train_X,train_Y,test_X,test_Y,learning_rate,epoch):
    # train_x = (image_width * image_width * 3, training_examples)
    # train_y = (1, train_examples)
    # test_x = (____, test_examples)
    # test_y = (1, test_examples)
    
    assert(train_X.shape[1] == train_Y.shape[1]), "train exampes dimensions invalid"
#     print("logisticShapes")
#     print("train x",train_X.shape)
#     print("train y",train_Y.shape)
#     print("test X",test_X.shape)
#     print("test Y",test_Y.shape)
    #Inialize w/b
    w,b = initialize_zeros(side_length*side_length*3)
    
    #Train
    _, params,_ = optimizer(w, b, train_X, train_Y, learning_rate, epoch)
    w,b = params["w"], params["b"]
    print("Training finsihed")

    #Eval
    prediction_train = predictor(train_X,w,b)
    accuracy_train = np.mean(np.abs(prediction_train - train_Y))
    print("Train Accuracy:", accuracy_train)
    
    #Test
    prediction_test = predictor(test_X,w,b)
    accuracy_test = np.mean(np.abs(prediction_test - test_Y))
    print("Actual Accuracy:", accuracy_test)
        

In [98]:
train_X = standardized_train_examples
train_Y = labels_train
test_X = standardized_test_examples
test_Y = labels_test

# print("start")
# print(train_X.shape,"train_X")
# print(train_Y.shape,"train_Y")
# print(test_X.shape,"test_X")
# print(test_Y.shape,"test_Y")

logisticRegression(train_X,train_Y,test_X,test_Y,learning_rate,epoch)

cost 0.69314718056 0.1 %


  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  


cost nan 1.1 %
cost nan 2.1 %
cost nan 3.1 %
cost nan 4.1 %
cost nan 5.1 %
cost nan 6.1 %
cost nan 7.1 %
cost nan 8.1 %
cost nan 9.1 %
cost nan 10.1 %
cost nan 11.1 %
cost nan 12.1 %
cost nan 13.1 %
cost nan 14.1 %
cost nan 15.1 %
cost nan 16.1 %
cost nan 17.1 %
cost nan 18.1 %
cost nan 19.1 %
cost nan 20.1 %
cost nan 21.1 %
cost nan 22.1 %
cost nan 23.1 %
cost nan 24.1 %
cost nan 25.1 %
cost nan 26.1 %
cost nan 27.1 %
cost nan 28.1 %
cost nan 29.1 %
cost nan 30.1 %
cost nan 31.1 %
cost nan 32.1 %
cost nan 33.1 %
cost nan 34.1 %
cost nan 35.1 %
cost nan 36.1 %
cost nan 37.1 %
cost nan 38.1 %
cost nan 39.1 %
cost nan 40.1 %
cost nan 41.1 %
cost nan 42.1 %
cost nan 43.1 %
cost nan 44.1 %
cost nan 45.1 %
cost nan 46.1 %
cost nan 47.1 %
cost nan 48.1 %
cost nan 49.1 %
cost nan 50.1 %
cost nan 51.1 %
cost nan 52.1 %
cost nan 53.1 %
cost nan 54.1 %
cost nan 55.1 %
cost nan 56.1 %
cost nan 57.1 %
cost nan 58.1 %
cost nan 59.1 %
cost nan 60.1 %
cost nan 61.1 %
cost nan 62.1 %
cost nan 63.1 %
c