In [38]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

In [39]:
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [40]:
X = iris["data"][:, (2,3)]
y = iris["target"]
print("Data: ")
print(X[:5, :])
print("Target: ")
print(y[:5])

Data: 
[[1.4 0.2]
 [1.4 0.2]
 [1.3 0.2]
 [1.5 0.2]
 [1.4 0.2]]
Target: 
[0 0 0 0 0]


In [41]:
# Adding a bias equal to one
X_with_bias = np.c_[np.ones([len(X), 1]), X]
print("Data with bias: ")
print(X_with_bias[:5, :])

Data with bias: 
[[1.  1.4 0.2]
 [1.  1.4 0.2]
 [1.  1.3 0.2]
 [1.  1.5 0.2]
 [1.  1.4 0.2]]


In [42]:
#Set a germ of randomness 
np.random.seed(42)

In [43]:
#Create a validation, train and test set
test_ratio = 0.2
validation_ratio = 0.2
total_size = len(X_with_bias)

test_size = int(total_size * test_ratio)
validation_size = int(total_size * validation_ratio)
train_size = total_size - test_size - validation_size

random_indexes = np.random.permutation(total_size)

X_train = X_with_bias[random_indexes[:train_size]]
y_train = y[random_indexes[:train_size]]
X_validation = X_with_bias[random_indexes[train_size: -validation_size]]
y_validation = y[random_indexes[train_size: -validation_size]]
X_test = X_with_bias[random_indexes[-test_size:]]
y_test = y[random_indexes[-test_size:]]

In [44]:
# Implemenation of one hot encoding
def to_one_hot(y):
    number_of_classes = y.max() + 1
    m = len(y)
    Y_one_hot = np.zeros((m, number_of_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

In [45]:
Y_train_one_hot = to_one_hot(y_train)
Y_test_one_hot = to_one_hot(y_test)
Y_validation_one_hot = to_one_hot(y_validation)

In [46]:
# Softmax function
def softmax(logits):
    exps = np.exp(logits)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return exps/exp_sums

In [47]:
# Define number of input and output
n_inputs = X_train.shape[1]
n_outputs = len(np.unique(y_train))
print("Number of input:", n_inputs)
print("Number of outputs:", n_outputs)

Number of input: 3
Number of outputs: 3


In [48]:
# Train a softmax model
eta = 0.05
n_iteration = 5001
m = len(X_train)
epsilon = 1e-7

Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    error = Y_proba - Y_train_one_hot
    if iteration%500 == 0:
        print(iteration, loss)
    gradients = 1/m * X_train.T.dot(error)
    Theta = Theta - eta * gradients

0 3.5356045081790177
500 0.4711387636214726
1000 0.36581083713176793
1500 0.31328647878313015
2000 0.2797297275966953
2500 0.25570597398940204
3000 0.23734231165431838
3500 0.22269576109780229
4000 0.210660238747567
4500 0.20054860276937572
5000 0.19190588292458005


In [49]:
print("Model parameters: ")
print(Theta)

Model parameters: 
[[ 5.60687481 -1.02439696 -7.40120651]
 [-1.2010688   0.87371521  0.43203466]
 [-2.07975966 -0.56278888  4.5593004 ]]


In [50]:
logits = X_validation.dot(Theta)
Y_proba = softmax(logits)
print("Calculation of class membership: ")
print(Y_proba[:5, :])
y_predict = np.argmax(Y_proba, axis=1)
print("Selecting the class to which the example belongs: ")
print(y_predict[:5])

accuracy_score = np.mean(y_predict == y_validation)
print("Accuracy_score on validation set:", accuracy_score)

Calculation of class membership: 
[[2.00871059e-01 7.09394171e-01 8.97347699e-02]
 [8.94217927e-06 5.28286645e-02 9.47162393e-01]
 [1.03040300e-03 6.06212094e-01 3.92757503e-01]
 [6.87136627e-04 8.42232117e-01 1.57080746e-01]
 [4.60611763e-06 6.24005163e-02 9.37594878e-01]]
Selecting the class to which the example belongs: 
[1 2 1 1 2]
Accuracy_score on validation set: 0.9333333333333333


In [51]:
# Train a softmax model with regularization
eta = 0.05
n_iteration = 5001
m = len(X_train)
epsilon = 1e-7
alpha = 0.1 # regularization hyperparameter
Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    cross_entropy_loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    l2_loss = 1/2*np.sum(np.square(Theta[1:]))
    loss = cross_entropy_loss + l2_loss * alpha
    error = Y_proba - Y_train_one_hot
    if iteration%500 == 0:
        print(iteration, loss)
    gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]
    Theta = Theta - eta * gradients

0 4.074160805836161
500 0.5688580833116054
1000 0.516013536950202
1500 0.49922398558649905
2000 0.4912573069611827
2500 0.48686925940258124
3000 0.484287461452685
3500 0.48271305096298733
4000 0.4817310402264586
4500 0.48110891247376814
5000 0.48071029299693224


In [52]:
print("Model parameter: ")
print(Theta)

Model parameter: 
[[ 4.84714903  0.7652686  -3.7504893 ]
 [-1.07834979  0.2003082   0.87804159]
 [-0.42858153 -0.14665727  0.5752388 ]]


In [53]:
logits = X_validation.dot(Theta)
Y_proba = softmax(logits)
print("Calculation of class membership: ")
print(Y_proba[:5, :])
y_predict = np.argmax(Y_proba, axis=1)
print("Selecting the class to which the example belongs: ")
print(y_predict[:5])

accuracy_score = np.mean(y_predict == y_validation)
print("Accuracy_score on validation set:", accuracy_score)

Calculation of class membership: 
[[0.44181835 0.47111822 0.08706344]
 [0.00563123 0.26592748 0.72844129]
 [0.0257683  0.46382122 0.51041048]
 [0.01309895 0.42234828 0.56455277]
 [0.00275734 0.21715809 0.78008457]]
Selecting the class to which the example belongs: 
[1 2 2 2 2]
Accuracy_score on validation set: 0.9333333333333333


In [69]:
# Train a softmax model with regularization and early stopping
eta = 0.05
n_iteration = 100001
m = len(X_train)
epsilon = 1e-7
alpha = 0.1 # regularization hyperparameter
best_loss = np.infty
Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    cross_entropy_loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    l2_loss = 1/2*np.sum(np.square(Theta[1:]))
    loss = cross_entropy_loss + l2_loss * alpha
    error = Y_proba - Y_train_one_hot
    gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]
    Theta = Theta - eta * gradients
    
    logits = X_validation.dot(Theta)
    Y_proba = softmax(logits)
    cross_entropy_loss = -np.mean(np.sum(Y_validation_one_hot * np.log(Y_proba + epsilon), axis=1))
    l2_loss = 1/2*np.sum(np.square(Theta[1:]))
    loss = cross_entropy_loss + l2_loss * alpha
    if iteration%500 == 0:
        print(iteration, loss)
    if loss <= best_loss:
        best_loss = loss
    else:
        print(iteration - 1, best_loss)
        print(iteration, loss, "Early stopping")
        break

0 4.7755668264369415
500 0.5936428632121784
1000 0.5612631722586426
1500 0.5496906967143853
2000 0.5433929119602923
2500 0.5395592349996164
3000 0.5371294279788861
3500 0.5355559777885432
4000 0.5345206964438962
4500 0.5338298338530085
5000 0.5333625796433861
5500 0.5330423817103542
6000 0.5328200858591043
6500 0.5326637502554392
7000 0.5325523827470584
7500 0.5324720364526762
8000 0.5324133459629801
8500 0.532369955359362
9000 0.5323375049178377
9500 0.5323129716458026
10000 0.5322942361856757
10500 0.5322797958994419
11000 0.5322685731776343
11500 0.5322597862875975
12000 0.5322528616151301
12500 0.53224737351044
13000 0.5322430026803494
13500 0.53223950713507
14000 0.5322367016996726
14500 0.5322344434158482
15000 0.5322326210293855
15500 0.5322311473373667
16000 0.5322299535561513
16500 0.5322289851316935
17000 0.5322281985900724
17500 0.5322275591462519
18000 0.5322270388714817
18500 0.5322266152766842
19000 0.5322262702088132
19500 0.5322259889850163
20000 0.5322257597091614
2050

In [67]:
logits = X_validation.dot(Theta)
Y_proba = softmax(logits)
print("Calculation of class membership: ")
print(Y_proba[:5, :])
y_predict = np.argmax(Y_proba, axis=1)
print("Selecting the class to which the example belongs: ")
print(y_predict[:5])

accuracy_score = np.mean(y_predict == y_validation)
print("Accuracy_score on validation set:", accuracy_score)

Calculation of class membership: 
[[0.44923503 0.47847329 0.07229168]
 [0.0052255  0.25362713 0.74114736]
 [0.02530796 0.46442003 0.51027201]
 [0.01251189 0.41276492 0.57472319]
 [0.00246856 0.20052266 0.79700878]]
Selecting the class to which the example belongs: 
[1 2 2 2 2]
Accuracy_score on validation set: 0.9333333333333333
