In [1]:
# !pip install scikit-learn

In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

import numpy as np

In [3]:
def toy_data_binary():
    """Generate a random n-class classification problem and split arrays or matrices into random train and test subsets using functions
    
    Functions:
    make_classification() -- imported
    train_test_split() -- imported
    
    Returns:
    X_train, X_test, y_train, y_test -- np arrays (.

    """
    data = make_classification(n_samples=500, 
                              n_features=2,
                              n_informative=1, 
                              n_redundant=0, 
                              n_repeated=0, 
                              n_classes=2, 
                              n_clusters_per_class=1, 
                              class_sep=1., 
                              random_state=42)
    
    X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], train_size=0.7, random_state=42)
    
    return X_train, X_test, y_train, y_test

In [4]:
binary_toy_data = toy_data_binary()
# binary_toy_data

In [5]:
def accuracy_score(true, preds):
    return np.sum(true == preds).astype(float) / len(true)

In [19]:
def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1005):
    """Find the optimal parameters w and b for inputs X and y. Use the *average* of the gradients for all training examples multiplied by the step_size to update parameters.
    
    Parameters:
    X -- np array (training features of size N-by-D, where N is the number of training points and D is the dimensionality of features)
    y -- np array (binary training labels of N dimensional)
    N -- int -- (#training points, indicating the labels of training data (either 0 or 1))
    loss -- str (loss type; either perceptron or logistic)
    w0 -- np array (initial weight vector)
    b0 -- scalar (initial bias term)
    step_size -- float (learning rate)
    max_iterations -- int (#iterations to perform gradient descent)

    Returns:
    w -- np array (D-dimensional vector, the final trained weight vector)
    b -- scalar (the final trained bias term)
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2
    # print(N, "training points of size: ",  D)
    
    
    w = np.zeros(D)
    if w0 is not None:
        w = w0
    # print(w)
    b = 0
    if b0 is not None:
        b = b0

    if loss == "perceptron":
        ################################################
        # TODO 1 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize perceptron loss (use -1 as the   #
		# derivative of the perceptron loss at 0)      # 
        ################################################
        # print(X.shape, X)
        # print(w.shape, w)
        
        # print("y : ", y)
        # change the misclassified points from 0 to -1 with loop
        # for i in range(len(y)):
        #     if y[i] == 0:
        #         y[i] = -1
        # print(y)
        
        # change the misclassified points from 0 to -1 with np
        y = np.where(y == 0, -1, 1)
        # print("y : ", y)
        
        for i in range(0, max_iterations):
            # print("epoch=", i)
            
            # prediction
            y_hat = np.add(np.dot(X, w), b)
            # print("y_hat : ", y_hat.shape, y_hat)
            
            # classification
            y_times_y_hat = np.multiply(y, y_hat)
            # print("y_times_y_hat : ", y_times_y_hat.shape, y_times_y_hat)
            
            # get misclassified
            misclassified_indicator = np.where(y_times_y_hat <= 0, -1, 1)
            # print("misclassified_indicator :", misclassified_indicator.shape, misclassified_indicator)
            
            # if (indicator_0_or_1 >= 1).any():
            #     print("CORRECTLY CLASSIFIED POINTS")
            #     print(y_times_y_hat)
            # else: 
            #     print("MISCLASSIFIED POINTS")
            #     print(y_times_y_hat)
            # misc_i = misclassified_indicator * y
            # print("misc_i : ", misc_i.shape, misc_i)
            
            # print(X.T.shape, y.shape)
            gradient = np.multiply(X.T, y)        
            # gradient = np.dot(X.T, y)
            # print("gradient :", gradient.shape, gradient)
            # print(X, "*", indicator_0_or_1, "* (", y_hat, "-", y, ") =", gradient)
            
            # print(w.shape, "+", step_size, "*", gradient.shape, "*", misclassified_indicator.shape)
            
            ins = np.dot(gradient, misclassified_indicator)
            # print("ins : ", ins.shape, ins)
            
            w = np.add(w, (step_size * (ins) / N))
            # print("w : ", w.shape)
            # print(b, "+", step_size, "*", y_hat, "*", y.shape)
            b = b + (step_size * np.sum(misclassified_indicator * gradient) / N)
            # print("b : ", b.shape)
           
            # print()

    elif loss == "logistic":
        ################################################
        # TODO 2 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize logistic loss                    # 
        ################################################

        
        pass
    else:
        raise "Undefined loss function."

    assert w.shape == (D,)
    return w, b        

In [41]:
def binary_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: D-dimensional vector, a numpy array which is the weight 
    vector of your learned model
    - b: scalar, which is the bias of your model
    
    Returns:
    - preds: N-dimensional vector of binary predictions (either 0 or 1)
    """
    N, D = X.shape
        
    #############################################################
    # TODO 4 : predict DETERMINISTICALLY (i.e. do not randomize)#
    #############################################################
    
    get_pred = np.add(np.dot(X, w.T), b)
    if (get_pred > 0).any:
        return 1
    else:
        return -1
    
    assert preds.shape == (N,) 
    return preds


In [42]:
X_train, X_test, y_train, y_test = binary_toy_data

In [43]:
# my_X_train = X_train[0:3]
# my_y_train = y_train[0:3]
# my_X_test = X_test[0:3]
# my_y_test = y_test[0:3]

# # print(my_X_train)
# # print(my_y_train)
# for loss_type in ["perceptron"]:
#     my_w, my_b = binary_train(my_X_train, my_y_train, loss=loss_type)
#     # print(my_w, my_b)
#     # my_train_preds = binary_predict(my_X_train, my_w, my_b)
#     # # print(my_train_preds)
#     # my_preds = binary_predict(my_X_test, my_w, my_b)
#     # # print(my_preds)
#     # print(loss_type + ' train acc: %f, test acc: %f' 
#     #             %(accuracy_score(my_y_train, my_train_preds), accuracy_score(my_y_test, my_preds)))


In [44]:
X_train, X_test, y_train, y_test = binary_toy_data
# print(X_train[0:5])
# print(y_train[0:5])
for loss_type in ["perceptron"]:
    # print(loss_type)
    w, b = binary_train(X_train, y_train, loss=loss_type)
    # print(w, b)
    train_preds = binary_predict(X_train, w, b)
    # print(train_preds)
    preds = binary_predict(X_test, w, b)
    # print(preds)
    print(loss_type + ' train acc: %f, test acc: %f' 
                %(accuracy_score(y_train, train_preds), accuracy_score(y_test, preds)))
    

perceptron train acc: 0.514286, test acc: 0.473333


In [24]:
# my_X_train = np.array([
#             [2.7810836,2.550537003,0],
#             [1.465489372,2.362125076,0],
#             [3.396561688,4.400293529,0],
#             [1.38807019,1.850220317,0],
#             [3.06407232,3.005305973,0],
#             [7.627531214,2.759262235,1],
#             [5.332441248,2.088626775,1],
#             [6.922596716,1.77106367,1],
#             [8.675418651,-0.242068655,1],
#             [7.673756466,3.508563011,1]
#             ])

# my_w_train = np.array([-0.1, 0.20653640140000007, -0.23418117710000003])

# my_y_train = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# for loss_type in ["perceptron"]:
#     my_w, my_b = binary_train(my_X_train, my_y_train, loss=loss_type, w0=my_w_train)

#     print()

# References

1. USC CSCI-567 Machine Learning
2. [make_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html#sklearn-datasets-make-classification) Documentation
3. [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html#sklearn.model_selection.train_test_split) Documentation