# ***LOGISTIC REGRESSION***

In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## ***Training***

In [2]:
# Sigmoid Function
def SigmoidFunction(X):
  return 1/(1+np.exp(-X))

In [3]:
# Function to generate Hypothesis
def Hypothesis(X, theta):
  return SigmoidFunction(np.dot(X, theta))

In [4]:
# Cost Function
def Cost(X, Y1, theta):
    H = Hypothesis(X, theta)
    return -(np.sum(Y1*np.log(H) + (1-Y1)*np.log(1-H)))/(len(X))

In [5]:
# Logistic Regression Training Function
def LogisticRegressionTraining(X, Y_HotOne, theta, LearningRate, Iterations, BatchSize):
  m = BatchSize
  X_Clipped = X[0:BatchSize]
  Y_HotOne_Clipped = Y_HotOne[0:BatchSize]

  for i in range(0, Iterations):
    H = Hypothesis(X_Clipped, theta)
    theta = theta - LearningRate*(np.dot(X_Clipped.T, H - Y_HotOne_Clipped) / m)

  return theta

In [6]:
# Importing Training Data
TrainingData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/emnist-letters-train.csv")
TrainingData

Unnamed: 0,23,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.406,0.407,0.408,0.409,0.410,0.411,0.412,0.413,0.414,0.415
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,16,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,15,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,17,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88794,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88795,21,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88796,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
88797,23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
# Initializing X (Feature Values) for training
X = np.array(TrainingData.drop('23', axis=1)) / 255
X = np.concatenate((np.ones((X.shape[0],1)),X), axis=1)

print('X Shape : ', X.shape)
print(X)

X Shape :  (88799, 785)
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [8]:
# Initializing Y (Labels) for training
Y = (np.array([TrainingData['23']])).T

print('Y Shape : ', Y.shape)
print(Y)

Y Shape :  (88799, 1)
[[ 7]
 [16]
 [15]
 ...
 [ 1]
 [23]
 [12]]


In [9]:
# Determining Different Classes of Output
Classes = np.unique(Y)
print('Classes : ')
print(Classes)

Classes : 
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26]


In [10]:
# Hot-One Notation for Y
Y_HotOne = np.zeros((X.shape[0], len(Classes)))

Num_Classes = len(Classes)
m = len(Y)
for i in range(m):
  Y_HotOne[i, Y[i]-1] = 1

print('Y_HotOne Shape : ', Y_HotOne.shape)
print(Y_HotOne)

Y_HotOne Shape :  (88799, 26)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [11]:
# Randomly Initializing theta
theta = np.zeros((X.shape[1], len(np.unique(Y))))
print('theta Shape : ', theta.shape)
print(theta)

theta Shape :  (785, 26)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [12]:
# Mian / Calculating theta
LearningRate = 0.1
Iterations = 100
BatchSize = 88799

theta = LogisticRegressionTraining(X, Y_HotOne, theta, LearningRate, Iterations, BatchSize)
print('theta Shape : ', theta.shape)
print(theta)
print('\nCost : ', Cost(X, Y_HotOne, theta))

theta Shape :  (785, 26)
[[-0.23884358 -0.2198144  -0.17981288 ... -0.22729002 -0.12528819
  -0.23117569]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
   0.        ]]

Cost :  2.9239150678632435


In [13]:
# Generating Hypothesis
H = Hypothesis(X, theta)

print('H Shape : ', H.shape)
print(H)

H Shape :  (88799, 26)
[[1.01069658e-02 6.81240600e-04 5.79396619e-03 ... 2.84490248e-04
  1.47189859e-04 4.60456229e-04]
 [2.14021853e-02 7.34360774e-02 7.75038867e-03 ... 1.92745897e-02
  4.49375059e-02 3.13154034e-02]
 [1.37324274e-02 5.27019046e-03 2.23323090e-01 ... 8.46563317e-04
  1.03213721e-03 9.90788078e-02]
 ...
 [4.04982695e-02 3.88967315e-03 1.35643151e-02 ... 2.94774204e-02
  8.13950633e-03 2.96240914e-02]
 [4.47796409e-02 9.88461281e-02 9.90386380e-03 ... 1.14702041e-01
  2.31190866e-02 1.22066883e-01]
 [7.22718337e-02 7.14595488e-02 6.47636879e-02 ... 6.98391666e-02
  1.31672575e-01 9.80999675e-02]]


In [14]:
# Hot-One Notation for Hypothesis
H_HotOne = np.zeros(H.shape)

for row in range(0, H.shape[0]):
  max = np.max(H[row,:])

  for column in range(0, H.shape[1]):
    if H[row, column] == max :
      H_HotOne[row, column] = 1

print('H_HotOne Shape : ', H_HotOne.shape)
print(H_HotOne)

H_HotOne Shape :  (88799, 26)
[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [15]:
# Generating Predicted Output
Y_Predicted = np.zeros((Y.shape[0], 1))
rows = H_HotOne.shape[0]
cols = H_HotOne.shape[1]

for i in range(rows):
  for j in range(cols):
    if(H_HotOne[i][j] == 1):
      Y_Predicted[i][0] = j+1
      break

print('Y_Predicted Shape : ', Y_Predicted.shape)
print(Y_Predicted)

Y_Predicted Shape :  (88799, 1)
[[ 1.]
 [16.]
 [ 3.]
 ...
 [ 5.]
 [23.]
 [ 9.]]


In [16]:
# Comparing Expected vs Prediction
print('EXPECTED vs PREDICTED')
print(np.concatenate((Y, Y_Predicted), axis=1))

EXPECTED vs PREDICTED
[[ 7.  1.]
 [16. 16.]
 [15.  3.]
 ...
 [ 1.  5.]
 [23. 23.]
 [12.  9.]]


In [17]:
# Calculating Accuracy
count = 0
for i in range(len(Y_HotOne)):
  if np.sum(H_HotOne[i] * Y_HotOne[i]) == 1:
    count+=1;

print('Accuracy : ', (count/len(Y_HotOne))*100, '%')

Accuracy :  58.50516334643409 %


## ***Testing***

In [18]:
# Importing Testing Data
TestingData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/emnist-letters-test.csv")
TestingData

Unnamed: 0,1,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.548,0.549,0.550,0.551,0.552,0.553,0.554,0.555,0.556,0.557
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14794,19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14795,19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14796,19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14797,19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# Initializing X_test for testing
X_test = np.array(TestingData.drop('1', axis=1)) / 255
X_test = np.concatenate((np.ones((X_test.shape[0],1)),X_test), axis=1)

print('X_test Shape : ', X_test.shape)
print(X_test)

X_test Shape :  (14799, 785)
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [20]:
# Initializing Y_test for testing
Y_test = (np.array([TestingData['1']])).T

print('Y_test Shape : ', Y_test.shape)
print(Y_test)

Y_test Shape :  (14799, 1)
[[ 1]
 [ 1]
 [ 1]
 ...
 [19]
 [19]
 [19]]


In [21]:
# Generating Hypothesis
H_test = Hypothesis(X_test, theta)

print('H_test Shape : ', H_test.shape)
print(H_test)

H_test Shape :  (14799, 26)
[[0.05969134 0.01732334 0.00031692 ... 0.2061908  0.02196591 0.02184293]
 [0.249429   0.02027539 0.19305317 ... 0.01578221 0.01163344 0.08002725]
 [0.04241813 0.00881835 0.02374339 ... 0.00156627 0.0015939  0.00451117]
 ...
 [0.01653589 0.03752488 0.01170763 ... 0.0021611  0.00296081 0.00733138]
 [0.03700069 0.09890472 0.04350199 ... 0.01560758 0.0197909  0.05418248]
 [0.14396241 0.00787586 0.01321251 ... 0.05715421 0.02324446 0.02256633]]


In [22]:
# Hot-One Notation for Hypothesis_test
H_test_HotOne = np.zeros(H_test.shape)

for row in range(0, H_test.shape[0]):
  max = np.max(H_test[row,:])

  for column in range(0, H_test.shape[1]):
    if H_test[row, column] == max :
      H_test_HotOne[row, column] = 1

# Generating Predicted test Output
Y_test_Predicted = np.zeros((Y_test.shape[0], 1))
rows = H_test_HotOne.shape[0]
cols = H_test_HotOne.shape[1]

for i in range(rows):
  for j in range(cols):
    if(H_test_HotOne[i][j] == 1):
      Y_test_Predicted[i][0] = j+1
      break

# Comparing Expected vs Prediction
print('EXPECTED vs PREDICTED')
print(np.concatenate((Y_test, Y_test_Predicted), axis=1))

EXPECTED vs PREDICTED
[[ 1. 24.]
 [ 1. 21.]
 [ 1. 15.]
 ...
 [19. 19.]
 [19. 10.]
 [19.  1.]]


In [23]:
# Calculating Accuracy
count = 0
for i in range(len(Y_test_Predicted)):
  if Y_test_Predicted[i] == Y_test[i]:
    count+=1;

print('Accuracy : ', (count/len(Y_test_Predicted))*100, '%')

Accuracy :  56.956551118318814 %
