In [275]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [276]:
tr_data = pd.read_csv('mnist_train.csv')
ts_data = pd.read_csv('mnist_test.csv')

In [277]:
tr_data.describe()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,...,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0
mean,4.453933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.200433,0.088867,0.045633,0.019283,0.015117,0.002,0.0,0.0,0.0,0.0
std,2.88927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.042472,3.956189,2.839845,1.68677,1.678283,0.3466,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [278]:
tr_data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [279]:
train_data = tr_data.drop(columns='label').values
train_labels = tr_data.label.values
test_data = ts_data.drop(columns='label').values
test_labels = ts_data.label.values


In [280]:
train_labels.shape

(60000,)

In [295]:
train_data = train_data/255
test_data = test_data/255

In [281]:
# the sigmoid function 
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [282]:
# cross-entropy cost with regularization
def compute_cost(X, y, w, b, lambda_=1):
    m, n = X.shape
    f = sigmoid(X@w + b)
    loss = np.dot(-y, np.log(f)) - np.dot((1-y), np.log(1-f))
    reg_cost = np.sum((w**2) * (lambda_/(2*m)))
    total_cost = np.sum(loss)/m + reg_cost
    return total_cost   #scalar cost
    

In [283]:
# computing the gradient with regularization
def compute_gradient(X, y, w, b, lambda_=1):
    m, n = X.shape
    f = sigmoid(X@w + b)
    dj_dw = np.dot((f - y), X) + w*lambda_/m
    dj_db = np.sum(f-y)
    return dj_db/m, dj_dw/m

In [284]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_):
    m = len(X)
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)
        w_in = w_in - alpha * dj_dw
        b_in = b_in - alpha * dj_db
    return w_in, b_in

In [329]:
iterations = 1000
alpha = 1
lambda_ = 0.1
initial_b = 1.
initial_w = np.zeros(train_data.shape[1])

In [330]:
classifiers = []

for class_label in range(10):
    y_binary = np.where(train_labels == class_label, 1, 0)
    w, b = gradient_descent(train_data, y_binary, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations, lambda_)
    classifiers.append((w, b))


In [331]:
def predict_class_ovr(input_example, classifiers):
    scores = []
    for classifier in classifiers:
        w, b = classifier
        score = sigmoid(input_example @ w + b)  
        scores.append(score)
    predicted_class = np.argmax(scores)  
    return predicted_class

ovr_predictions = np.array([predict_class_ovr(example, classifiers) for example in test_data])

correct_predictions = np.sum(ovr_predictions == test_labels)
total_examples = len(test_labels)
accuracy = (correct_predictions / total_examples) * 100
print("Accuracy:", accuracy, "%")


Accuracy: 91.79 %
