In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
class LogisticRegression:
    """
    Implements a Logistic Regression Classifier.
    """
    
    def __init__(self, input_dim, output_dim, inner,L2_reg):
        """
        Initializes the parameters of the logistic regression classifer to 
        random values.
        
        args:
            input_dim: Number of dimensions of the input data
            output_dim: Number of classes
            hidden_dim: Number of hidden nodes
        """
        
        self.theta01 = np.random.randn(input_dim, inner) / np.sqrt(input_dim)       
        self.bias01 = np.zeros((1, inner))
        
        self.theta12 = np.random.randn(inner, output_dim) / np.sqrt(inner) 
        self.bias12 = np.zeros((1, output_dim))
        self.L2_reg = L2_reg
        
    #--------------------------------------------------------------------------
    
    def compute_cost(self,X, y):
        """
        Computes the total cost on the dataset.
        
        args:
            X: Data array
            y: Labels corresponding to input data
        
        returns:
            cost: average cost per data sample
        """
        num_examples = np.shape(X)[0]
        z = np.dot(X,self.theta1) + self.bias1
        a = np.tanh(z)
        z2 = np.dot(a, self.theta2) + self.bias2
        exp_z2 = np.exp(z2)
        softmax_scores = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
        
        one_hot_y = np.zeros((num_examples,np.max(y)+1))
        logloss = np.zeros((num_examples,))        
        for i in range(np.shape(X)[0]):
            one_hot_y[i,y[i]] = 1
            logloss[i] = -np.sum(np.log(softmax_scores[i,:]) * one_hot_y[i,:])
        data_loss = np.sum(logloss)
        data_loss += self.L2_reg/2 * (np.sum(np.square(self.theta12)) + np.sum(np.square(self.theta01)))
        return 1./num_examples * data_loss

    
    #--------------------------------------------------------------------------
 
    def predict(self,X):
        """
        Makes a prediction based on current model parameters.
        
        args:
            X: Data array
            
        returns:
            predictions: array of predicted labels
        """
        z01 = np.dot(X,self.theta01) + self.bias01
        X1 = np.tanh(z01)
        z12 = np.dot(X1, self.theta12) + self.bias12
        exp_z = np.exp(z12)
        softmax_scores = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        predictions = np.argmax(softmax_scores, axis = 1)
        return predictions
        
    #--------------------------------------------------------------------------
   
    def fit(self,X,y,num_epochs,alpha=0.01):     
        
        for epoch in range(0, num_epochs):

            # Forward propagation
            z01 = np.dot(X, self.theta01) + self.bias01
            X1 = np.tanh(z01)
            z12 = np.dot(X1, self.theta12) + self.bias12
            exp_z = np.exp(z12)
            softmax_scores = exp_z / np.sum(exp_z, axis=1, keepdims=True)          
                                                                       
                                                                                            
            # Backpropagation
            beta2 = np.zeros_like(softmax_scores)
            one_hot_y2 = np.zeros_like(softmax_scores)
            for i in range(X.shape[0]):
                one_hot_y2[i, y[i]] = 1
            beta2 = softmax_scores - one_hot_y2
                                       
            
            
            # Compute gradients of model parameters
            dtheta2 = np.dot(X1.T, beta2)
            dtheta2 += self.L2_reg * self.theta12
            dbias2 = np.sum(beta2, axis = 0, keepdims=True)
            
            delta2 = np.dot(beta2, self.theta12.T) * (1 - X1*X1)
            dtheta1 = np.dot(X.T, delta2)   
            dtheta1 += self.L2_reg * self.theta01
            dbias1 = np.sum(delta2, axis = 0)
        
            # Gradient descent parameter update
            self.theta01 -= alpha * dtheta1
            self.bias01  -= alpha * dbias1
            self.theta12 -= alpha * dtheta2
            self.bias12  -= alpha * dbias2
        return 0


In [3]:
#1. Load data
X_train = np.genfromtxt('DATA/Digit_X_train.csv', delimiter=',')
y_train = np.genfromtxt('DATA/Digit_y_train.csv', delimiter=',').astype(np.int64)
X_act = np.genfromtxt('DATA/Digit_X_test.csv', delimiter=',')
y_act = np.genfromtxt('DATA/Digit_y_test.csv', delimiter=',').astype(np.int64)

In [4]:
#2. Initialize model
input_dim = np.shape(X_train)[1]
output_dim = np.max(y_train) + 1
print(input_dim)
print(output_dim)
logreg = LogisticRegression(input_dim, output_dim, 40,L2_reg=0.02)
#4. Train the model
logreg.fit(X_train,y_train,1000,alpha=0.0009)

64
10


0

In [5]:
acc = 0
y_pred = logreg.predict(X_act)
con_mat = np.zeros((output_dim, output_dim))
for i in range(len(y_pred)):
    con_mat[y_pred[i], y_act[i]] += 1
    if y_act[i] == y_pred[i]:
        acc += 1
acc = acc/len(y_pred)
print ('ACCURACY: ', acc)
print ('CONFUSION MATRIX: \n', con_mat)

ACCURACY:  0.9410456062291435
CONFUSION MATRIX: 
 [[86.  0.  0.  0.  1.  0.  0.  0.  0.  1.]
 [ 0. 82.  0.  0.  1.  0.  1.  1.  3.  0.]
 [ 0.  0. 84.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  2. 77.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0. 86.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  4.  0. 87.  0.  1.  3.  2.]
 [ 1.  0.  0.  0.  1.  1. 90.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0. 84.  0.  0.]
 [ 0.  2.  0.  8.  0.  0.  0.  0. 82.  0.]
 [ 0.  5.  0.  1.  3.  3.  0.  3.  0. 88.]]


In [6]:
X_split = np.split(X_train,[179,359,539,719])
y_split = np.split(y_train,[179,359,539,719])

In [7]:
# 5-fold round robin cross validation here.
counts = 0
total_con_mat = np.zeros((10,10))
for i in range(0,5):
    X_test = X_split[i]
    y_test = y_split[i]
    X_train = np.delete(X_split, i, 0)
    y_train = np.delete(y_split, i, 0)
    tempX=X_train[0]
    tempY=y_train[0]
    for j in range(1,4):
        tempX = np.vstack((tempX,X_train[j]))
        tempY = np.append(tempY, y_train[j])
    print(X_test.shape)
    logreg.fit(tempX,tempY,1000,alpha=0.0009)

    y_pred = logreg.predict(X_test)
    con_mat = np.zeros((output_dim, output_dim))
    acc = 0
    for i in range(len(y_pred)):
        con_mat[y_pred[i], y_test[i]] += 1
        if y_test[i] == y_pred[i]:
            acc += 1
    acc = acc/len(y_pred)
    counts += acc
    total_con_mat += con_mat
print ('ACCURACY: ', counts/5)
print ('CONFUSION MATRIX: \n', total_con_mat/5)      

(179, 64)
(180, 64)
(180, 64)
(180, 64)
(179, 64)
ACCURACY:  0.9988826815642458
CONFUSION MATRIX: 
 [[18.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.  18.2  0.   0.   0.   0.   0.   0.   0.2  0. ]
 [ 0.   0.  18.2  0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.  18.4  0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.  17.8  0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.  18.2  0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.  18.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.  18.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.  17.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.  17.6]]


In [8]:
acc = 0
y_pred = logreg.predict(X_act)
con_mat = np.zeros((output_dim, output_dim))
for i in range(len(y_pred)):
    con_mat[y_pred[i], y_act[i]] += 1
    if y_act[i] == y_pred[i]:
        acc += 1
acc = acc/len(y_pred)
print ('ACCURACY: ', acc)
print ('CONFUSION MATRIX: \n', con_mat)

ACCURACY:  0.9432703003337041
CONFUSION MATRIX: 
 [[86.  0.  0.  0.  1.  0.  0.  0.  0.  1.]
 [ 0. 82.  0.  0.  0.  1.  1.  1.  3.  0.]
 [ 0.  0. 84.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  2. 78.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0. 87.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  4.  0. 86.  0.  0.  3.  2.]
 [ 1.  0.  0.  0.  0.  1. 90.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0. 85.  0.  0.]
 [ 0.  2.  0.  6.  0.  0.  0.  0. 82.  0.]
 [ 0.  5.  0.  2.  4.  3.  0.  3.  0. 88.]]
