# Load Data 

In [1]:
import glob, os 
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
path="images/"
imlist= glob.glob(os.path.join(path, '*.jpg'))

In [3]:
def dataset(file_list,size=(300,180),flattened=False):
    no_train=500
    no_test=len(file_list)-500
    data_train = []
    data_test = []
    label_train=[]
    label_test=[]
    random.shuffle(file_list)
    for i, file in enumerate(file_list):
        image = io.imread(file)
        image = transform.resize(image, size)
        if flattened:
            image = image.flatten()
        
        if(random.randint(1,10000)%2==0):
            if(no_train>0):
                data_train.append(image)
                if(file[file.find('P')-1]=='N'):
                    label_train.append(0)
                else:
                    label_train.append(1)
                no_train-=1
        else:
            if(no_test>0):
                data_test.append(image)
                if(file[file.find('P')-1]=='N'):
                    label_test.append(0)
                else:
                    label_test.append(1)
                no_test-=1
            else:
                data_train.append(image)
                if(file[file.find('P')-1]=='N'):
                    label_train.append(0)
                else:
                    label_train.append(1)
                no_train-=1
        
    print(file_list[0])

    return np.array(data_train), np.array(label_train), np.array(data_test), np.array(label_test)

In [4]:
# Load the dataset (may take a few seconds)
X_train,y_train,X_test,y_test=dataset(imlist)

images\P60227-182r.jpg


In [5]:
# X has the following structure: X[imageid, y,x,channel]
print('X-train: ',X_train.shape)  # data
print('y-train: ',y_train.shape)  # target
print('X-test: ',X_test.shape)
print('y-test: ',y_test.shape)

X-train:  (500, 300, 180, 3)
y-train:  (500,)
X-test:  (214, 300, 180, 3)
y-test:  (214,)


In [6]:
print('Class 0(train): ',sum(y_train==0))
print('Class 1(train): ',sum(y_train==1))
print('Total  : ',len(y_train))

Class 0(train):  248
Class 1(train):  252
Total  :  500


In [7]:
%matplotlib inline 

In [8]:
# fig, axes = plt.subplots(1,2)
# k=0
# plt.sca(axes[0])
# plt.imshow(X[k])
# plt.title('img {} - class {}'.format(k, y[k]))

# k=400
# plt.sca(axes[1])
# plt.imshow(X[k])
# plt.title('img {} - class {}'.format(k, y[k]));

In [9]:
train_x_flatten = X_train.reshape(X_train.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = X_test.reshape(X_test.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
X_train = train_x_flatten
X_test = test_x_flatten

print ("train_x's shape: " + str(X_train.shape))
print ("test_x's shape: " + str(X_test.shape))

train_x's shape: (162000, 500)
test_x's shape: (162000, 214)


In [10]:
nx = 162000
nh=7
ny=1
m=500

In [11]:
W1 = np.random.randn(nh,nx) *0.01
b1 = np.zeros((nh,1))
W2 = np.random.randn(ny,nh) * 0.01
b2 = np.zeros((ny,1))
y_train = y_train.reshape(m,1)
y_test = y_test.reshape(214,1)

In [12]:
print(W1.shape)
print(W2.shape)
print(y_train[0][0])

(7, 162000)
(1, 7)
1


In [13]:
num_iters=3000
alpha = 0.0075
for i in range(num_iters):
    Z1 = np.matmul(W1,X_train) + b1
    A1 = np.tanh(Z1)
    Z2 = np.matmul(W2,A1) + b2
    A2 = 1/(1+np.exp(-Z2))
    J = - np.sum((y_train * np.log(np.transpose(A2)) + (1-y_train)*np.log(np.transpose(1-A2))))/m
    dZ2 = A2 - np.transpose(y_train)
    dW2 = np.matmul(dZ2,np.transpose(A1))/m
    db2 = np.sum(dZ2,axis=1,keepdims=True)/m
    dZ1 = np.matmul(np.transpose(W2),dZ2) * (1-(A1*A1))
    dW1 = np.matmul(dZ1,np.transpose(X_train))/m
    db1 = np.sum(dZ1,axis=1,keepdims=True)/m
    
    W1 = W1 - alpha*dW1
    b1 = b1 - alpha*db1
    W2 = W2 - alpha*dW2
    b2 = b2 - alpha*db2
    
    if(i%100 == 0):
        print("Cost after ",i," iterations : ",J)

Cost after  0  iterations :  0.6945195516064421
Cost after  100  iterations :  0.6796311591792955
Cost after  200  iterations :  0.5951629434179057
Cost after  300  iterations :  0.5533779969625875
Cost after  400  iterations :  0.4931277778815836
Cost after  500  iterations :  0.44138545531795703
Cost after  600  iterations :  0.3707908610263413
Cost after  700  iterations :  0.38516464809094975
Cost after  800  iterations :  0.39138769283389957
Cost after  900  iterations :  0.3162362186695258
Cost after  1000  iterations :  0.28793414519107313
Cost after  1100  iterations :  0.2374071632926453
Cost after  1200  iterations :  0.28237426199326854
Cost after  1300  iterations :  0.21504408114225462
Cost after  1400  iterations :  0.18051435856580184
Cost after  1500  iterations :  0.18780171230178758
Cost after  1600  iterations :  0.16753679635104304
Cost after  1700  iterations :  0.16416039914200553
Cost after  1800  iterations :  0.15429927360181142
Cost after  1900  iterations :  

In [14]:
def forwardProp(X):
    Z1 = np.matmul(W1,X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.matmul(W2,A1) + b2
    A2 = 1/(1+np.exp(-Z2))
    return A2

In [20]:
def predict(X):
    A2 = forwardProp(X)
    predictions = A2 > 0.5
    return predictions

In [22]:
predictions = predict(X_train)
print ('Train Accuracy: %d' % float((np.dot(predictions,y_train) + np.dot(1-predictions,1-y_train))/float(y_train.size)*100) + '%')

(1, 500)
(500, 1)
Test Accuracy: 95%


In [23]:
predictions = predict(X_test)
y_test = y_test.reshape(y_test.size,1)
print ('Test Accuracy: %d' % float((np.dot(predictions,y_test) + np.dot(1-predictions,1-y_test))/float(y_test.size)*100) + '%')

(214, 1)
Test Accuracy: 86%
