In [194]:
import os
import file_parser
import pandas as pd
import cv2
import numpy as np
import time
import h5py
import json

import file_parser

## Create CSV file [file_location, output] 

In [2]:
dataset_path = "E:\MachineLearning\Datasets\dogs-vs-cats\train"
dataset_csv = 'data/data.csv'

if not os.path.exists(dataset_csv):
    if not os.path.exists('data'):
        os.makedirs('data')
    df = file_parser.get_all_files()
    file_parser.save_as_csv(df,dataset_csv)
    print("Created csv file")
else:
    print("CSV file already exists. Skipping...")

CSV file already exists. Skipping...


## Create train and test data 

In [3]:
x_train_file = 'data/train.h5'
x_test_file = 'data/test.h5'
image_size_x = 24;
image_size_y = 24;
train_size = 0.8;
if not os.path.exists(x_test_file):
    file_parser.split_and_save(dataset_csv, image_size_x, image_size_y, train_size, x_train_file, x_test_file)
    print("Dataset split and saved as train.h5 and test.h5")
else:
    print("Train and Test files already exists. Skipping...")

Train and Test files already exists. Skipping...


## CUSTOM FUNCTIONS

In [60]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

##  Load train and test data

In [4]:
with h5py.File('data/train.h5','r') as train:
    X_train = train.get('X')[()]
    y_train = train.get('y')[()]

In [5]:
print(X_train.shape, y_train.shape)

(1728, 20000) (1, 20000)


In [6]:
with h5py.File('data/test.h5','r') as test:
    X_test = test.get('X')[()]
    y_test = test.get('y')[()]

In [7]:
print(X_test.shape, y_test.shape)

(1728, 5000) (1, 5000)


In [8]:
X_test.shape

(1728, 5000)

In [11]:
max(X_train[0])

255

In [12]:
X_train = X_train/255
X_test = X_test/255

In [13]:
X_train[0]

array([0.71372549, 0.42745098, 0.30980392, ..., 0.50196078, 0.57254902,
       0.12941176])

## Initialize weights and bias

In [215]:
def run_logistic_regression(X, y, w, b, learning_rate=0.01, stop_criteria=None, num_iterations=5000, print_cost=False):
    m = X.shape[0]
    _run_time=0
    error = stop_criteria+1 or 1
    i=0
    J = None
    while i<num_iterations and abs(error)>stop_criteria:
        i+=1
        t1=time.time()
        z = np.dot(w.T, X)+b
        a = sigmoid(z)


        dz = a-y
        dw = np.dot(X, dz.T)/m
        db = np.sum(dz)/m

        J_prev = J or 0
        J = -np.sum(y*np.log(a)+(1-y)*np.log(1-a))/m
        error = abs(J-J_prev)
        
        w -= learning_rate*dw
        b -= learning_rate*db
        
        t2=time.time()
        _run_time+=(t2-t1)
        if print_cost:
            print("Iteration {i}: cost = {cost}, execution time: {time:.3f}s, error: {err:.5f}".format(i=i, cost=J, time=t2-t1, err=error))
    
    
    print("\nAfter {n} iterations: cost = {cost}, total execution time: {time:.3f}s".format(n=i, cost=J, time=_run_time))
    return {
        "w": w.tolist(),
        "b": b,
        "learning_rate": learning_rate,
        "cost": J
    }

In [216]:
m=X_train.shape[0]
learning_rate = 0.001
w = np.ones((m,1))*0.001
b = 0
n=10
show_epoch_details=True
stop_criteria=0.0001

res = run_logistic_regression(X_train, y_train, w, b, learning_rate, stop_criteria=stop_criteria, num_iterations=n,print_cost=show_epoch_details)


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 17.06it/s][AIteration 1: cost = 8.904553324368749, execution time: 0.057s, error: 8.90455
Iteration 2: cost = 8.018315786613861, execution time: 0.059s, error: 0.88624
Iteration 3: cost = 8.013919533977745, execution time: 0.069s, error: 0.00440

 40%|████      | 4/10 [00:00<00:00, 16.24it/s][A
 60%|██████    | 6/10 [00:00<00:00, 15.86it/s][AIteration 4: cost = 8.010259186752245, execution time: 0.065s, error: 0.00366
Iteration 5: cost = 8.006731769184233, execution time: 0.070s, error: 0.00353
Iteration 6: cost = 8.00332448377366, execution time: 0.051s, error: 0.00341

 80%|████████  | 8/10 [00:00<00:00, 16.06it/s][A
100%|██████████| 10/10 [00:00<00:00, 16.12it/s]Iteration 7: cost = 8.000030118245958, execution time: 0.061s, error: 0.00329
Iteration 8: cost = 7.996842002904957, execution time: 0.055s, error: 0.00319
Iteration 9: cost = 7.993753931834328, execution time: 0.056s, error: 0.00309
Iteration

In [167]:
with open("data/result.json",'w') as f:
    f.write(json.dumps(res, default=str))

## Train-Test Accuracy

In [169]:
with h5py.File('data/train.h5','r') as train:
    X_train = train.get('X')[()]
    y_train = train.get('y')[()]

In [170]:
with h5py.File('data/test.h5','r') as test:
    X_test = test.get('X')[()]
    y_test = test.get('y')[()]

In [172]:
X_train = X_train/255
X_test = X_test/255

In [173]:
with open('data/result.json','r') as f:
    res = json.load(f)

In [177]:
w = np.array(res['w'])
b = res['b']

In [176]:
w.shape

(1728, 1)

In [183]:
y_train_pred = sigmoid(np.dot(w.T,X_train)+b)>0.5
y_test_pred = sigmoid(np.dot(w.T,X_test)+b)>0.5

In [187]:
True-1

0

In [220]:
np.sum(y_train==y_train_pred)/20000*100

59.465

# ROUGH 

In [9]:
img = cv2.imread(X_train_loc[0])

In [10]:
img.shape

(374, 500, 3)

In [12]:
small = cv2.resize(img, (24,24))

In [35]:
small.shape

array([ 95, 172, 211, ...,   0,   4,   3], dtype=uint8)

In [44]:
np.ones((3,1))*0.1

array([[0.1],
       [0.1],
       [0.1]])

In [15]:
cv2.imshow('image', small)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [17]:
cv2.resize(img, (24,24)).reshape(1,-1).shape

(1, 1728)

In [11]:
tst = pd.read_csv('data/data.csv')

In [12]:
tst.shape

(25000, 3)

In [218]:
a = np.array([[1,2,3],[2,2,2]])
b = np.array([[2,2,2]])
a==b

array([[False,  True, False],
       [ True,  True,  True]])