## 1 - Packages ##

In [1]:
import numpy as np
from sklearn.datasets import load_digits 
import sklearn.model_selection as sk
from copy import deepcopy
from sklearn.metrics import accuracy_score

## 2 - Load data, reshape and standardize ##

In [2]:
###Load data
digits = load_digits()

X = digits.data
y = digits.target

classes = digits.target_names
num_classes = classes.shape[0]

# print(X.shape)
# print(y.shape)

In [23]:
###Split data into train and test set
X_train, X_test, y_train, y_test = sk.train_test_split(X, y, test_size = 0.25, random_state = 0)

# print(X_train.shape)
# print(y_train.shape)
# print(X_test.shape)
# print(y_test.shape)

In [24]:
###Reshape the training and test examples
train_set_x_flatten = X_train.reshape(X_train.shape[0], -1).T
test_set_x_flatten = X_test.reshape(X_test.shape[0], -1).T

train_set_y = y_train.reshape(y_train.shape[0], -1).T
test_set_y = y_test.reshape(y_test.shape[0], -1).T

# print ("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
# print ("train_set_y shape: " + str(y_train.shape))
# print ("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
# print ("test_set_y shape: " + str(y_test.shape))

In [25]:
###Standardize dataset
train_set_x = (train_set_x_flatten - np.mean(train_set_x_flatten, axis = 0)) / np.std(train_set_x_flatten, axis = 0)
test_set_x = (test_set_x_flatten - np.mean(test_set_x_flatten, axis = 0)) / np.std(test_set_x_flatten, axis = 0)

# print(train_set_x.shape)
# print(test_set_x.shape)

## 3 - Building the parts of our algorithm ## 
### 3.1 - Helper functions

In [6]:
###Sigmoid function

def sigmoid(z):
    '''
        OBJECTIVE: Compute the sigmoid of z.
        INPUT PARAMETERS: 
                        z : A scalar or numpy array of any size.
        RETURN VALUE:
                        s : sigmoid(z)
    '''
    s = 1 / (1 + np.exp(-z))
    
    return s

In [7]:
###Change labels.
def changeLabels(class_number, y_train):
    '''
        OBJECTIVE: To change labels of the dataset such that if label is equals to class_number, change it to 1, else 0.
        INPUT PARAMETERS:
                        class_number : One of the target class number.
                        y_train : training labels represented by a numpy array (vector) of shape (1, m_train)
        RETURN VALUES:
                    y_train_changed_label : training labels represented by array of shape (1, m_train), containing value = 1, where label = class_number, else 0.
    '''
    
    #y_train_changed_label = deepcopy(y_train)
    y_train_changed_label = np.where(y_train == class_number, 1, 0)
    
    
    return y_train_changed_label

### 3.2 - Initializing parameters

In [8]:
def initialize_with_zeros(dim):
    '''
        OBJECTIVE: This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0.
        INPUR PARAMETERS:
                        dim : size of the w vector we want (or number of parameters in this case)
        RETURN VALUE:
                        w : initialized vector of shape (dim, 1)
                        b : initialized scalar (corresponds to the bias)
    '''
    
    w = np.zeros((dim, 1))
    b = 0
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

### 3.3 - Forward and Backward propagation

In [9]:
def propagate(w, b, X, Y):
    '''
        OBJECTIVE: Implement the cost function and its gradient for the propagation.
        INPUT PARAMETERS:
                        w : weights, a numpy array of size (64, 1)
                        b : bias, a scalar
                        X : data of size (64, number of examples) #There are 64 features in the dataset.
                        Y : true "label" vector (1, number of examples)
        RETURN VALUE:
                        cost : negative log-likelihood cost for logistic regression
                        dw : gradient of the loss with respect to w, thus same shape as w
                        db : gradient of the loss with respect to b, thus same shape as b
    '''
    
    m = X.shape[1]
    
    #FORWARD PROPAGATION (From X to cost)
    A = sigmoid(np.dot(w.T, X) + b)
    cost = (-1/m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A), axis = 1, keepdims = True)
    
    #BACK PROPAGATION (To find gradient)
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum((A - Y), axis = 1, keepdims = True)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

### 3.4 - Optimization

In [31]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    '''
        OBJECTIVE: This function optimizes w and b by running a gradient descent algorithm
        INPUT PARAMETERS:
                        w : weights, a numpy array of size (64, 1)
                        b : bias, a scalar
                        X : data of shape (64, number of examples)
                        Y : true "label" vector of shape (1, number of examples)
                        num_iterations : number of iterations of the optimization loop
                        learning_rate : learning rate of the gradient descent update rule
                        print_cost : True to print the loss every 100 steps
    
        RETURN VALUE:
                        params : dictionary containing the weights w and bias b
                        grads : dictionary containing the gradients of the weights and bias with respect to the cost function
                        costs : list of all the costs computed during the optimization, this will be used to plot the learning curve.
    '''
    
    costs = []
    
    for i in range(num_iterations):
        
        
        # Cost and gradient calculation
        grads, cost = propagate(w, b, X, Y)
        
        # Retrieve derivatives from grads
        dw = grads["dw"]
        db = grads["db"]
        
        # Update rule
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        # Record the costs
        if i % 100 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training iterations
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [11]:
def predict(models, X):
    '''
        OBJECTIVE: Predict the label using learned logistic regression parameters (w, b)
        INPUT PARAMETERS:
                        models : dictionary containing weight and bias for different class labels.
                        X : data of size (64, number of examples)

        RETURN VALUES:
                        Y_prediction : a numpy array (vector) containing all predictions for the examples in X
    '''
    
    m = X.shape[1]  #no. of training examples
    Y_prediction = np.zeros((1,m))
    
    A = np.zeros((10,m))
    
    for classNo in classes:
        model = models[classNo]
        w = model["w"].reshape(X.shape[0], 1)
        b = model["b"]
        a = sigmoid(np.dot(w.T, X) + b)
        
        A[classNo] = deepcopy(a)
        
    Y_prediction = np.reshape(np.argmax(A, axis = 0), (-1, m))
    
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

## 4 - Merge all functions into a model ##

In [32]:
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    """
        OBJECTIVE: Builds the logistic regression model by calling the functions.    
        INPUT PARAMETERS:
                        X_train : training set represented by a numpy array of shape (64, m_train)
                        Y_train : training labels represented by a numpy array (vector) of shape (1, m_train)
                        X_test : test set represented by a numpy array of shape (64, m_test)
                        Y_test : test labels represented by a numpy array (vector) of shape (1, m_test)
                        num_iterations : hyperparameter representing the number of iterations to optimize the parameters
                        learning_rate : hyperparameter representing the learning rate used in the update rule of optimize()
                        print_cost : Set to true to print the cost every 100 iterations

        RETURN VALUES:
                        d : dictionary containing information about the models.
    """

    # initialize parameters with zeros
    m = X_test.shape[1]
    A = np.zeros((10,m))
    models = {}
    
    for iter in range(num_classes):
        w, b = initialize_with_zeros(X_train.shape[0])
    
        #Change labels
        Y_train_changed_labels = changeLabels(iter, Y_train)
        
        # Gradient descent
        parameters, grads, costs = optimize(w, b, X_train, Y_train_changed_labels, num_iterations, learning_rate, print_cost)
        
        # Retrieve parameters w and b from dictionary "parameters"
        w = parameters["w"]
        b = parameters["b"]
        
        d = {
            "w" : w,
            "b" : b,
            }
    
        models[iter] = d
        
    Y_prediction_train = predict(models, X_train)
    train_accuracy = accuracy_score(np.squeeze(Y_train),np.squeeze(Y_prediction_train))*100
    
    Y_prediction_test = predict(models, X_test)
    test_accuracy = accuracy_score(np.squeeze(Y_test),np.squeeze(Y_prediction_test))*100
    
    print("Train accuracy: ",train_accuracy)
    print("Test accuracy", test_accuracy)
    
    return models

In [33]:
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 1000, learning_rate = 0.05, print_cost = False)

Train accuracy:  96.43652561247215
Test accuracy 95.55555555555556
