# Imports + Load Data

In [1]:
# !pip install scikit-learn

In [2]:
from __future__ import division, print_function
import sklearn.datasets as skl_datasets
import sklearn.model_selection as skl_model_selection

import numpy as np
import json

In [3]:
def toy_data_binary():
    """Generate a random n-class classification problem and split arrays or matrices into random train and test subsets using functions
    
    Returns:
    X_train, X_test, y_train, y_test -- np arrays

    """
    data = skl_datasets.make_classification(n_samples=500, 
                              n_features=2,
                              n_informative=1, 
                              n_redundant=0, 
                              n_repeated=0, 
                              n_classes=2, 
                              n_clusters_per_class=1, 
                              class_sep=1., 
                              random_state=42)
    
    X_train, X_test, y_train, y_test = skl_model_selection.train_test_split(data[0], data[1], train_size=0.7, random_state=42)
    
    return X_train, X_test, y_train, y_test

In [4]:
binary_toy_data = toy_data_binary()
# binary_toy_data

In [5]:
def moon_dataset():
    """Generate two interleaving half circles (per the documentation)
    
    Returns:
    X_train, X_test, y_train, y_test -- np arrays
    
    """
    data = skl_datasets.make_moons(n_samples=500, shuffle=True, noise=0.2, random_state=42)
    X_train, X_test, y_train, y_test = skl_model_selection.train_test_split(data[0], data[1], train_size=0.7, random_state=42)
    
    return X_train, X_test, y_train, y_test

In [6]:
moon_data = toy_data_binary()
# moon_data

In [7]:
# Hand-written digits data
def data_loader_mnist(dataset='mnist_subset.json'):
    # This function reads the MNIST data and separate it into train, val, and test set
    with open(dataset, 'r') as f:
        data_set = json.load(f)
    train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']
    
    return np.asarray(train_set[0]), \
          np.asarray(test_set[0]), \
          np.asarray(train_set[1]), \
          np.asarray(test_set[1])


In [8]:
mnist_data = data_loader_mnist()

# TODOs

In [9]:
def accuracy_score(true, preds):
    return np.sum(true == preds).astype(float) / len(true)

In [10]:
def sigmoid(z):

    """
    Inputs:
    - z: a numpy array or a float number
    Returns:
    - value: a numpy array or a float number after applying the sigmoid function 1/(1+exp(-z)).
    """

    ############################################
    # TODO 3 : fill in the sigmoid function    #
    ############################################
    # print(z)
    sigm = 1 / (1 + np.exp(-z))
    # sigm = -1 * np.exp(-z) / (1 + np.exp(-z))
    # sigm = np.exp(z) / (np.exp(z) + 1)
    return sigm

In [11]:
def binary_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: D-dimensional vector, a numpy array which is the weight 
    vector of your learned model
    - b: scalar, which is the bias of your model
    
    Returns:
    - preds: N-dimensional vector of binary predictions (either 0 or 1)
    """
    N, D = X.shape
        
    #############################################################
    # TODO 4 : predict DETERMINISTICALLY (i.e. do not randomize)#
    #############################################################
    print()
    
    store_preds = []
    get_preds = np.add(np.dot(X, w.T), b)
    # print("get_preds : ", get_preds)
    
    for i in range(len(get_preds)):
        # print(i, get_preds[i])
        if (get_preds[i] > 0):
            store_preds.append(1)
            # print(preds)
            
        else:
            store_preds.append(0)
            
    # print(store_preds)
    preds = np.array(store_preds)
    assert preds.shape == (N,) 
    return preds


In [12]:
def threshold(logistic_loss):
    """Show where specific point in X is classified
    
    Parameters:
    X -- np array
    
    """
    
    for j in range(len(logistic_loss)):
        if logistic_loss[j] >= 0.5:
                print("Above")
        else:
            print("Below")
        

In [13]:
def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1000):
    """Find the optimal parameters w and b for inputs X and y. Use the *average* of the gradients for all training examples multiplied by the step_size to update parameters.
    
    Parameters:
    X -- np array (training features of size N-by-D, where N is the number of training points and D is the dimensionality of features)
    y -- np array (binary training labels of N dimensional)
    N -- int -- (#training points, indicating the labels of training data (either 0 or 1))
    loss -- str (loss type; either perceptron or logistic)
    w0 -- np array (initial weight vector)
    b0 -- scalar (initial bias term)
    step_size -- float (learning rate)
    max_iterations -- int (#iterations to perform gradient descent)
    
    Functions:
    sigmoid -- float (sigmoid value between or equal to 0 and 1)

    Returns:
    w -- np array (D-dimensional vector, the final trained weight vector)
    b -- scalar (the final trained bias term)
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2
        
    w = np.zeros(D)
    if w0 is not None:
        w = w0
        
    b = 0
    if b0 is not None:
        b = b0

    if loss == "perceptron":
        ################################################
        # TODO 1 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize perceptron loss (use -1 as the   #
		# derivative of the perceptron loss at 0)      # 
        ################################################
        
        # the labels are either 0 or 1, instead of -1 or +1, so
        # change 0 to -1 for the misclassified points
        y = np.where(y == 0, -1, 1)
        
        for i in range(0, max_iterations):
            
            # make a prediction
            y_hat = np.add(np.dot(X, w), b)
            
            # classify the prediction
            y_times_y_hat = np.multiply(y, y_hat)
            
            # get the misclassified predictions
            misclassified_indicator = np.where(y_times_y_hat <= 0, -1, 1)
            
            # gradient
            gradient = np.multiply(X.T, y)        
            
            # update w and b
            w = np.add(w, (step_size * np.dot(gradient, misclassified_indicator) / N))
            b = b + (step_size * np.sum(misclassified_indicator * gradient) / N)
            
            # print()

    elif loss == "logistic":
        ################################################
        # TODO 2 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize logistic loss                    # 
        ################################################
        
        # replace 0 with -1 as both 0 and -1 are misclassified points
        # y = np.where(y == 0, -1, 1)
        
        for i in range (0, max_iterations):
            
            # make a prediction
            y_hat = np.add(np.dot(X, w), b)

            # predicted probabilities
            prob_of_y_hat = sigmoid(y_hat)
            
            difference_in_prod_pred_and_true = prob_of_y_hat - y
            
            # gradient
            gradient = np.dot(X.T, difference_in_prod_pred_and_true)
            gradient_w = 1 / N * gradient
            gradient_b = 1 / N * np.sum(difference_in_prod_pred_and_true)
            
            # update w and b
            w = w - step_size * gradient_w
            # print(w)
            b = b - step_size * gradient_b
            # print(b)
            
            # print()
            
    else:
        raise "Undefined loss function."

    assert w.shape == (D,)
    return w, b        

In [14]:
# X_train, X_test, y_train, y_test = binary_toy_data
# print(type(X_train))
# w, b = binary_train(X_train, y_train, loss="perceptron")
# print(w, b)
# # w, b = binary_train(X_train, y_train, loss="logistic")
# # print(w, b)

In [15]:
# train_preds = binary_predict(X_train, w, b)
# train_preds

In [16]:
# y_train

In [19]:
def run_binary(binary_toy_data, moon_data, mnist_data):
    
    datasets = [(binary_toy_data, 'Synthetic data'), 
                (moon_data, 'Two Moon data'),
                (mnist_data, 'Binarized MNIST data')]
    
    # datasets = [(binary_toy_data, 'Synthetic data'), 
    #             (moon_data, 'Two Moon data')]

    for data, name in datasets:
        print(name)
        X_train, X_test, y_train, y_test = data
        
        if name == 'Binarized MNIST data':
            y_train = [0 if yi < 5 else 1 for yi in y_train]
            y_test = [0 if yi < 5 else 1 for yi in y_test]
            y_train = np.asarray(y_train)
            y_test = np.asarray(y_test)
        
        for loss_type in ["perceptron", "logistic"]:
            # print(loss_type)
            w, b = binary_train(X_train, y_train, loss=loss_type)
            # print(w, b)
            train_preds = binary_predict(X_train, w, b)
            # print(train_preds)
            preds = binary_predict(X_test, w, b)
            # print(preds)
            print(loss_type + ' train acc: %f, test acc: %f' 
                        %(accuracy_score(y_train, train_preds), accuracy_score(y_test, preds)))
            

In [20]:
run_binary(binary_toy_data, moon_data, mnist_data)

Synthetic data


perceptron train acc: 0.397143, test acc: 0.440000


logistic train acc: 0.994286, test acc: 1.000000
Two Moon data


perceptron train acc: 0.397143, test acc: 0.440000


logistic train acc: 0.994286, test acc: 1.000000
Binarized MNIST data


perceptron train acc: 0.500000, test acc: 0.500000


logistic train acc: 0.871000, test acc: 0.834000


In [None]:
# https://stackoverflow.com/questions/42249982/systemexit-2-error-when-calling-parse-args-within-ipython
# if __name__ == '__main__':
    
# import argparse
# import sys

# parser = argparse.ArgumentParser()
# parser.add_argument("--type", )
# parser.add_argument("--output")
# parser.add_argument('-f')
# args = parser.parse_args()

# if args.output:
#     print(args)
#     sys.stdout = open(args.output, 'w')

# if not args.type or args.type == 'binary':
#     run_binary(binary_toy_data, moon_data, mnist_data)

# References

1. CLASS: [USC CSCI-567 Machine Learning](https://haipeng-luo.net/courses/CSCI567/2021_fall/index.html) by Haipeng Luo
2. WEBSITE: [make_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html#sklearn-datasets-make-classification) scikit-learn Documentation
3. WEBSITE: [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html#sklearn.model_selection.train_test_split) scikit-learn Documentation
4. PAPER: [Classification Notes](https://detraviousjbrinkley.notion.site/Classification-aka-Categorical-519984d18e3748c287b94d318e1fd0aa) by Detravious