In [2]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

In [3]:
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
X = iris["data"][:, (2,3)]
y = iris["target"]
print("Data: ")
print(X[:5, :])
print("Target: ")
print(y[:5])

Data: 
[[1.4 0.2]
 [1.4 0.2]
 [1.3 0.2]
 [1.5 0.2]
 [1.4 0.2]]
Target: 
[0 0 0 0 0]


In [5]:
# Adding a bias equal to one
X_with_bias = np.c_[np.ones([len(X), 1]), X]
print("Data with bias: ")
print(X_with_bias[:5, :])

Data with bias: 
[[1.  1.4 0.2]
 [1.  1.4 0.2]
 [1.  1.3 0.2]
 [1.  1.5 0.2]
 [1.  1.4 0.2]]


In [6]:
#Set a germ of randomness 
np.random.seed(42)

In [7]:
#Create a validation, train and test set
test_ratio = 0.2
validation_ratio = 0.2
total_size = len(X_with_bias)

test_size = int(total_size * test_ratio)
validation_size = int(total_size * validation_ratio)
train_size = total_size - test_size - validation_size

random_indexes = np.random.permutation(total_size)

X_train = X_with_bias[random_indexes[:train_size]]
y_train = y[random_indexes[:train_size]]
X_validation = X_with_bias[random_indexes[train_size: -validation_size]]
y_validation = y[random_indexes[train_size: -validation_size]]
X_test = X_with_bias[random_indexes[-test_size:]]
y_test = y[random_indexes[-test_size:]]

In [8]:
# Implemenation of one hot encoding
def to_one_hot(y):
    number_of_classes = y.max() + 1
    m = len(y)
    Y_one_hot = np.zeros((m, number_of_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

In [9]:
Y_train_one_hot = to_one_hot(y_train)
Y_test_one_hot = to_one_hot(y_test)
Y_validation_one_hot = to_one_hot(y_validation)

In [10]:
# Softmax function
def softmax(logits):
    exps = np.exp(logits)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return exps/exp_sums

In [11]:
# Define number of input and output
n_inputs = X_train.shape[1]
n_outputs = len(np.unique(y_train))
print("Number of input:", n_inputs)
print("Number of outputs:", n_outputs)

Number of input: 3
Number of outputs: 3


In [29]:
# Train a softmax model
eta = 0.05
n_iteration = 5001
m = len(X_train)
epsilon = 1e-7

Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    error = Y_proba - Y_train_one_hot
    if iteration%500 == 0:
        print(iteration, loss)
    gradients = 1/m * X_train.T.dot(error)
    Theta = Theta - eta * gradients

0 2.1558670968097458
500 0.5037136804073767
1000 0.37290732092550893
1500 0.3181648078229816
2000 0.2840738175795791
2500 0.25966838973587675
3000 0.24093253554723407
3500 0.2259276257173829
4000 0.21356013770955873
4500 0.20314899741286405
5000 0.19424017267038912


In [13]:
print("Model parameters: ")
print(Theta)

Model parameters: 
[[ 5.60687481 -1.02439696 -7.40120651]
 [-1.2010688   0.87371521  0.43203466]
 [-2.07975966 -0.56278888  4.5593004 ]]


In [27]:
logits = X_validation.dot(Theta)
Y_proba = softmax(logits)
print("Calculation of class membership: ")
print(Y_proba[:5, :])
y_predict = np.argmax(Y_proba, axis=1)
print("Selecting the class to which the example belongs: ")
print(y_predict[:5])

accuracy_score = np.mean(y_predict == y_validation)
print("Accuracy_score on validation set:", accuracy_score)

Calculation of class membership: 
[[2.00871059e-01 7.09394171e-01 8.97347699e-02]
 [8.94217927e-06 5.28286645e-02 9.47162393e-01]
 [1.03040300e-03 6.06212094e-01 3.92757503e-01]
 [6.87136627e-04 8.42232117e-01 1.57080746e-01]
 [4.60611763e-06 6.24005163e-02 9.37594878e-01]]
Selecting the class to which the example belongs: 
[1 2 1 1 2]
Accuracy_score on validation set: 0.9333333333333333


In [36]:
# Train a softmax model with regularization
eta = 0.05
n_iteration = 5001
m = len(X_train)
epsilon = 1e-7
alpha = 0.1 # regularization parameter
Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    cross_entropy_loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    l2_loss = 1/2*np.sum(np.square(Theta[1:]))
    loss = cross_entropy_loss + l2_loss * alpha
    error = Y_proba - Y_train_one_hot
    if iteration%500 == 0:
        print(iteration, loss)
    gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]
    Theta = Theta - eta * gradients

0 2.3294787281046965
500 0.545389888343696
1000 0.5090543454418135
1500 0.4961107501240011
2000 0.48958713637665996
2500 0.4858999583285211
3000 0.4837012890713871
3500 0.4823494945453566
4000 0.48150164899360437
4500 0.4809623748550879
5000 0.4806158175804728


In [38]:
print("Model parameter: ")
print(Theta)

Model parameter: 
[[ 4.17368981  0.08964528 -4.45795554]
 [-1.08072598  0.19854501  0.88218098]
 [-0.42909805 -0.14691223  0.57601028]]


In [37]:
logits = X_validation.dot(Theta)
Y_proba = softmax(logits)
print("Calculation of class membership: ")
print(Y_proba[:5, :])
y_predict = np.argmax(Y_proba, axis=1)
print("Selecting the class to which the example belongs: ")
print(y_predict[:5])

accuracy_score = np.mean(y_predict == y_validation)
print("Accuracy_score on validation set:", accuracy_score)

Calculation of class membership: 
[[0.44232519 0.47164103 0.08603378]
 [0.0056034  0.26512477 0.72927183]
 [0.02573494 0.46386078 0.51040428]
 [0.01305828 0.42172588 0.56521584]
 [0.00273738 0.21605628 0.78120634]]
Selecting the class to which the example belongs: 
[1 2 2 2 2]
Accuracy_score on validation set: 0.9333333333333333
