### Batch Gradient Descent with early stopping for Softmax Regression

In [86]:
# import numpy and the function to load the dataset
import numpy as np
from sklearn.datasets import load_iris

In [87]:
# load the data
iris = dict(load_iris())
X = iris["data"]
y = iris["target"]
iris["feature_names"]

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [88]:
# create another variable for the intercept
iris["feature_names"].insert(0, "intercept")
intercept = np.ones((len(y), 1))
X = np.concatenate((intercept, X), axis=1)
X.shape

(150, 5)

In [89]:
# function to split the dataset
def split(X, y, test_ratio, val_ratio, seed = 42):
    '''
    Returns X and y arrays for train, test and 
    validation in that order
    '''
    # get sizes for each set
    n = len(y)
    test_size = int(n * test_ratio)
    val_size = int(n * val_ratio)
    train_size = n - test_size - val_size

    # set random seed
    np.random.seed(seed)

    # select the indices for each set
    indices = np.random.permutation(n)
    train_indices = indices[:train_size]
    test_indices = indices[train_size:-val_size]
    val_indices = indices[-val_size:]

    # returns selected instances
    return (X[train_indices], y[train_indices], 
            X[test_indices], y[test_indices],
            X[val_indices], y[val_indices])

In [90]:
# split the dataset
test_ratio = 0.2
val_ratio = 0.2
X_train, y_train, X_test, y_test, X_val, y_val = \
    split(X, y, test_ratio, val_ratio)

In [None]:
# one hot encode the labels
def one_hot(y):
    '''
    Returns one hot encoded matrix
    '''
    # identity matrix
    identity = np.eye(y.max() + 1)
    
    # select a different row for each class
    return identity[y]

In [102]:
np.eye(y.max() + 1)[y]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0

In [100]:
y.max()

2