In [2]:
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.0.2-cp37-cp37m-macosx_10_13_x86_64.whl (7.8 MB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Collecting joblib>=0.11
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
[K     |████████████████████████████████| 297 kB 2.5 MB/s eta 0:00:01
[?25hInstalling collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.0.2 threadpoolctl-3.1.0


In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

import numpy as np

In [4]:
def toy_data_binary():
    """Generate a random n-class classification problem and split arrays or matrices into random train and test subsets using functions
    
    Functions:
    make_classification() -- imported
    train_test_split() -- imported
    
    Returns:
    X_train, X_test, y_train, y_test -- np arrays (.

    """
    data = make_classification(n_samples=500, 
                              n_features=2,
                              n_informative=1, 
                              n_redundant=0, 
                              n_repeated=0, 
                              n_classes=2, 
                              n_clusters_per_class=1, 
                              class_sep=1., 
                              random_state=42)
    
    X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], train_size=0.7, random_state=42)
    
    return X_train, X_test, y_train, y_test

In [5]:
binary_toy_data = toy_data_binary()
# binary_toy_data

In [130]:
def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=100):
    """Find the optimal parameters w and b for inputs X and y. Use the *average* of the gradients for all training examples multiplied by the step_size to update parameters.
    
    Parameters:
    X -- np array (training features of size N-by-D, where N is the number of training points and D is the dimensionality of features)
    y -- np array (binary training labels of N dimensional)
    N -- int -- (#training points, indicating the labels of training data (either 0 or 1))
    loss -- str (loss type; either perceptron or logistic)
    w0 -- np array (initial weight vector)
    b0 -- scalar (initial bias term)
    step_size -- float (learning rate)
    max_iterations -- int (#iterations to perform gradient descent)

    Returns:
    w -- np array (D-dimensional vector, the final trained weight vector)
    b -- scalar (the final trained bias term)
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2
    print(N, "training points of size: ",  D)

    w = np.zeros(D)
    if w0 is not None:
        w = w0
    
    b = 0
    if b0 is not None:
        b = b0

    if loss == "perceptron":
        ################################################
        # TODO 1 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize perceptron loss (use -1 as the   #
		# derivative of the perceptron loss at 0)      # 
        ################################################
        # print(w)
        
        # set y values to -1s & 1s
        y = np.where(y == 0, -1, 1)
        # print("y : ", y)
        
        for i in range(0, max_iterations):
            # print(y[i].shape, w.shape, X[i].shape, b)
            # print(y[i], X[i])
            X_w_b = np.dot(X, w) + b
            y_X_w_b = np.dot(y, X_w_b)
            print(y_X_w_b)
            
            # if y_X_w_b <= 0:
            #     print("MISCLASSIFIED")
            # else:
            #     print("CORRECTLY CLASSIFIED")
            indicator = np.where(y_X_w_b <= 0, 1, 0)
            print("indicator : ", indicator.shape, indicator)
            i_y = np.multiply(indicator, y)
            print("i_y : ", i_y.shape)
            i_y_X = np.dot(i_y, X)
            print("i_y_X : ", i_y_X.shape, i_y_X)

            w = w + step_size * i_y_X / N
            print("w : ", w)
            b = b + np.sum(step_size * i_y / N)
            print("b : ", b)
           
            print()

    elif loss == "logistic":
        ################################################
        # TODO 2 : perform "max_iterations" steps of   #
        # gradient descent with step size "step_size"  #
        # to minimize logistic loss                    # 
        ################################################

        
        pass
    else:
        raise "Undefined loss function."

    assert w.shape == (D,)
    return w, b        

In [131]:

X_train, X_test, y_train, y_test = binary_toy_data
for loss_type in ["perceptron", "logistic"]:
    print(loss_type)
    w, b = binary_train(X_train, y_train, loss=loss_type)
    # train_preds = binary_predict(X_train, w, b)
    # preds = binary_predict(X_test, w, b)
    # print(loss_type + ' train acc: %f, test acc: %f' 
                # %(accuracy_score(y_train, train_preds), accuracy_score(y_test, preds)))
    print()

perceptron
350 training points of size:  2
0.0
indicator :  () 1
i_y :  (350,)
i_y_X :  (2,) [345.62693432  36.5572946 ]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,)
i_y_X :  (2,) [0. 0.]
w :  [0.49375276 0.05222471]
b :  0.014285714285714282

172.70630502223318
indicator :  () 0
i_y :  (350,

# References

1. USC CSCI-567 Machine Learning
2. [make_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html#sklearn-datasets-make-classification) Documentation
3. [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html#sklearn.model_selection.train_test_split) Documentation