In [412]:

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [413]:
from google.colab import drive 
drive.mount('/content/drive')
%cd /content/drive/MyDrive/
! mkdir logistic_data
!pwd
%cd logistic_data/
%cp /content/emnist-letters-train.csv /content/drive/MyDrive/logistic_data
%cp /content/emnist-letters-test.csv /content/drive/MyDrive/logistic_data
! ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive
mkdir: cannot create directory ‘logistic_data’: File exists
/content/drive/MyDrive
/content/drive/MyDrive/logistic_data
cp: cannot stat '/content/emnist-letters-train.csv': No such file or directory
cp: cannot stat '/content/emnist-letters-test.csv': No such file or directory
emnist-letters-test.csv  emnist-letters-train.csv


In [414]:
train=pd.read_csv("emnist-letters-train.csv",header=[0])

test=pd.read_csv("emnist-letters-test.csv",header=[0])
# null values were showing, so replaced them with the mean of the respective column
for col in train.columns:
  col_mean=train[col].mean()
  train[col].fillna(value=col_mean, inplace=True)
for col in test.columns:
   col_mean=test[col].mean()
   test[col].fillna(value=col_mean, inplace=True)

'''
train[np.isnan(train)] = 0 #replace null values with zero
test[np.isnan(test)] = 0
'''


'\ntrain[np.isnan(train)] = 0 #replace null values with zero\ntest[np.isnan(test)] = 0\n'

In [415]:
Y_train=train['23']
Y_train=np.vstack(Y_train).astype('float64')
X_train = train.iloc[: , 1:]

X_test = test.iloc[: , 1:]
Y_test=test['1']


In [416]:
X_train_normalised = X_train/255.000
X_test_normalised = X_test/255.000

In [417]:
train.isnull().sum().sum()

0

In [418]:
X_train.to_numpy().astype('float64')
X_test.to_numpy().astype('float64')

Y_test.to_numpy().astype('float64')

array([1., 1., 1., ..., 8., 8., 8.])

In [419]:
X_train_tr = X_train_normalised.transpose()
Y_train_tr = Y_train.reshape(1,Y_train.shape[0])
X_test_tr = X_test_normalised.transpose()
Y_test_tr = Y_test.values.reshape(1,Y_test.shape[0])

print(X_train_tr.shape)
print(Y_train_tr.shape)
print(X_test_tr.shape)
print(Y_test_tr.shape)

dim_train = X_train_tr.shape[1]
dim_test = X_test_tr.shape[1]


(784, 5969)
(1, 5969)
(784, 5897)
(1, 5897)


In [420]:
Y_train_shifted = Y_train_tr - 1
Y_test_shifted = Y_test_tr - 1

In [421]:
Xtrain = X_train_tr
ytrain = Y_train_shifted
Xtest = X_test_tr
ytest = Y_test_shifted

In [422]:
def sigmoid(z):
    s = 1.000 / (1.0000 + np.exp(-z)) # defining sigmoid fn
    
    return s

In [423]:
def initialize(dim):

    w = np.zeros((dim,1))
    b = 0.00
    
    assert (w.shape == (dim,1))
    assert (isinstance(b, float) or isinstance(b,int))
    
    return w,b

In [424]:
def propagate(w, b, X, Y):

    m = X.shape[1]  #no. of training eg
    
    z = np.dot(w.T,X)+b
    A = sigmoid(z)
    cost = -(1.0/m)*(np.sum(Y*np.log(A)+(1.0-Y)*np.log(1.0-A))) #defining cost function
    
    dw = 1.0/m*np.dot(X, (A-Y).T)
    db = 1.0/m*np.sum(A-Y)
    
    assert (dw.shape == w.shape)
    assert (db.dtype == float)
    
    cost = np.squeeze(cost)
    assert (cost.shape == ())
    
    grads = {"dw": dw, 
             "db":db}
    
    return grads, cost

In [425]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):

    costs = []
    
    for i in range(num_iterations):
        
        grads, cost = propagate(w, b, X, Y)
        
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - learning_rate*dw
        b = b - learning_rate*db
        
        if (i % 100) == 0:
            costs.append(cost)
            
        if print_cost and (i % 100) == 0:
            print ("Cost (iteration %i) = %f" %(i, cost))
            
    grads = {"dw": dw, "db": db}
    params = {"w": w, "b": b}
        
    return params, grads, costs

In [426]:
def predict (w, b, X):

    m = X.shape[1] # no. of training examples
    Y_prediction = np.zeros((1,m)) 
    w = w.reshape(X.shape[0],1)
    
    A = sigmoid (np.dot(w.T, X)+b)
    
    for i in range(A.shape[1]):
        if (A[:,i] > 0.500): 
            Y_prediction[:, i] = 1 # binary classification 
        elif (A[:,i] <= 0.5000):
            Y_prediction[:, i] = 0
            
    assert (Y_prediction.shape == (1,m))
    
    return Y_prediction

In [427]:
def model (X_train, Y_train, X_test, Y_test, num_iterations = 500, learning_rate=0.1, print_cost = False):
    
    w, b = initialize(X_train.shape[0])
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict (w, b, X_test)
    Y_prediction_train = predict (w, b, X_train)
    
    train_accuracy = 100.0 - np.mean(np.abs(Y_prediction_train-Y_train)*100.0)
    test_accuracy = 100.0 - np.mean(np.abs(Y_prediction_test-Y_test)*100.0)
    
    d = {"costs": costs,
        "Y_prediction_test": Y_prediction_test,
        "Y_prediction_train": Y_prediction_train,
         "w": w,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}
    
    print ("Accuarcy Test: ",  test_accuracy)
    print ("Accuracy Train: ", train_accuracy)
    
    return d

In [428]:
d = model (Xtrain, 
           ytrain, 
           Xtest, 
           ytest, 
           num_iterations = 500 , 
           learning_rate = 0.10, 
           print_cost = True)

Cost (iteration 0) = 0.693147


  import sys
  import sys
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Cost (iteration 100) = nan
Cost (iteration 200) = nan
Cost (iteration 300) = nan
Cost (iteration 400) = nan
Accuarcy Test:  -147.3630659657453
Accuracy Train:  -1057.9494052605125
