In [156]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import math
import cv2
import os
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

all_data = []
num_px   = 64

class img_obj():
    def __init__(self,path,shape):
        self.path = path
        self.shape= shape if (isinstance(shape,tuple)) else (shape,shape)
    def eval(self):
        self.img = cv2.imread(self.path)
        self.img = cv2.resize(self.img,self.shape,cv2.INTER_NEAREST)
        return self.img
    

In [157]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        img_path = os.path.join(dirname, filename)
        img      = img_obj(img_path,64)
        
        if(filename.lower().find('cat') ==0):
            all_data.append([img,1])
        elif(filename.lower().find('dog') == 0):
            all_data.append([img,0])
        else: continue


In [188]:
random.shuffle(all_data)
train_data = np.array(all_data[:math.ceil(0.7*len(all_data))])
test_data  = np.array(all_data[math.ceil(0.7*len(all_data)):])

print('Length of all data : ',len(all_data),'-Length of training data :',len(train_data) ,
                                                                        '-Length of test data :',len(test_data))
print('training data shape :',train_data.shape,' test data shape :',test_data.shape)

Length of all data :  10028 -Length of training data : 7020 -Length of test data : 3008
training data shape : (7020, 2)  test data shape : (3008, 2)


In [203]:
def data_pipeline(train_data,test_data,batch_size=50,get_all='True'):
    n_trbatchs = math.ceil(len(train_data)/batch_size)
    n_tebatchs = math.ceil(len(test_data)/batch_size) 
    
    if (get_all=='True'):
        seed_batch = 0#random.randint(0,n_tebatchs)
        test_imgs  = [img.eval().reshape(num_px*num_px*3,1) for img in test_data[seed_batch*batch_size:(seed_batch+1)*batch_size,0]]
        test_gt    = np.array(test_data[seed_batch*batch_size:(seed_batch+1)*batch_size,1],dtype=np.float64).reshape((1,batch_size))
        
        assert len(test_imgs)   == batch_size
        assert test_gt.shape    == (1,batch_size)
        yield test_imgs,test_gt
    
    i = 0
    while(True):
        if(((i+1)*batch_size) > len(train_data)):
            yield None,None
        train_imgs = [img.eval().reshape(num_px*num_px*3,1) for img in train_data[i*batch_size:(i+1)*batch_size,0]]
        train_gt   = np.array(train_data[i*batch_size:(i+1)*batch_size,1],dtype=np.float64).reshape((1,batch_size))
        
        assert len(train_imgs)   == batch_size
        assert train_gt.shape    == (1,batch_size)
        yield train_imgs,train_gt
        i+=1

In [223]:
def sigmoid(W,b,X):
    Z = np.dot(W.T,X)+b
    A = 1/(1+np.exp(-Z))
    for i,elem in enumerate(A[0]):
        if (elem <= 1.01086754e-02 ):A[0][i] = 1.01086754e-02
        elif(elem == 1): A[0][i] = 0.99
    
    assert A.dtype == np.float64
    return A

def compute_cost(A,X,Y):
    assert A.dtype == np.float64
    m  = X.shape[0] if X.shape[0] > X.shape[1] else X.shape[1]
    cost  = (-1/m) * np.sum( Y*np.log(A) + (1-Y) * (np.log(1-A)) ) 
    
    cost  = np.squeeze(cost)
    assert cost.shape == ()
    return cost
    
def compute_grads(A,X,Y):   
    assert X.dtype == np.float64
    assert A.shape == Y.shape
    
    m  = X.shape[0] if X.shape[0] < X.shape[1] else X.shape[1]
   
    dW = (1/m) * np.dot(X,(A-Y).T)
    db = (1/m) * np.sum((A-Y))
    
    grads = {'dW':dW ,
             'db':db}
    return grads

def compute_acc(A,Y):
    A   = (A > 0.5).astype(int)
    acc = 100-np.mean(np.abs(A-Y))*100.0
    return acc

def back_propagate(W,b,A,X,Y,LR):  
    grads = compute_grads(A,X,Y)
    dW    = grads['dW']
    db    = grads['db']
    
    W = W - LR*dW
    b = b - LR*db
    
    params= {'W':W,
             'b':b}
    
    return params

In [215]:
def imgs_preprocess(X):
    num_examples  = len(X)
    flat_img_size = X[0].shape[0]
    X = np.array(X,dtype=np.float64).reshape(num_examples,flat_img_size).T
    X/=255.0
    return X

In [216]:
LR = 0.005
n_epochs   = 100
batch_size = 200

#Initialize params 
W = np.zeros((num_px*num_px*3,1),dtype=np.float64)
b = 0

loss  = []

for epoch in range(0,n_epochs):
    data  = data_pipeline(train_data,test_data)
    try :tests = next(data)
    except : pass
    epoch_loss = 0
    for X,Y in data:
        if X == None:
            break
        X = imgs_preprocess(X)
        A = sigmoid(W,b,X)
        cost = compute_cost(A,X,Y)
        epoch_loss += cost
        
        params = back_propagate(W,b,A,X,Y,LR)
        W = params['W']
        b = params['b']
    
    #if (epoch == 5):
    loss.append(epoch_loss)
    if(epoch%10 ==0):
        print('epoch number :',epoch, 'Loss is equal to :',loss[-1])

epoch number : 0 Loss is equal to : 0.698756512149901
epoch number : 10 Loss is equal to : 0.6043193262880304
epoch number : 20 Loss is equal to : 0.5191944590843844
epoch number : 30 Loss is equal to : 0.5104315383091922
epoch number : 40 Loss is equal to : 0.4839977215798392
epoch number : 50 Loss is equal to : 0.4995858487735079
epoch number : 60 Loss is equal to : 0.491488099377809
epoch number : 70 Loss is equal to : 0.4710156493930272
epoch number : 80 Loss is equal to : 0.4887964988029167
epoch number : 90 Loss is equal to : 0.4567037599061172


In [225]:
data  = data_pipeline(train_data,test_data)
test_imgs,test_gt = next(data)
test_imgs,test_gt = next(data)
test_imgs   = imgs_preprocess(test_imgs)
preds       = sigmoid(W,b,test_imgs)
print('Test acc is : ',compute_acc(preds,test_gt))

[[0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0.
  1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1.
  1. 1.]]
Test acc is :  64.0


In [None]:
plt.plot(range(0,2000,100),loss[::100])
#Evaluate test accuracy

In [104]:

#Test 
# Any results you write to the current directory are saved as output.