In [1]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

In [2]:
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [3]:
X = iris["data"][:, (2,3)]
y = iris["target"]
print("Data: ")
print(X[:5, :])
print("Target: ")
print(y[:5])

Data: 
[[1.4 0.2]
 [1.4 0.2]
 [1.3 0.2]
 [1.5 0.2]
 [1.4 0.2]]
Target: 
[0 0 0 0 0]


In [4]:
# Adding a bias equal to one
X_with_bias = np.c_[np.ones([len(X), 1]), X]
print("Data with bias: ")
print(X_with_bias[:5, :])

Data with bias: 
[[1.  1.4 0.2]
 [1.  1.4 0.2]
 [1.  1.3 0.2]
 [1.  1.5 0.2]
 [1.  1.4 0.2]]


In [5]:
#Set a germ of randomness 
np.random.seed(42)

In [6]:
#Create a validation, train and test set
test_ratio = 0.2
validation_ratio = 0.2
total_size = len(X_with_bias)

test_size = int(total_size * test_ratio)
validation_size = int(total_size * validation_ratio)
train_size = total_size - test_size - validation_size

random_indexes = np.random.permutation(total_size)

X_train = X_with_bias[random_indexes[:train_size]]
y_train = y[random_indexes[:train_size]]
X_validation = X_with_bias[random_indexes[train_size: -validation_size]]
y_validation = y[random_indexes[train_size: -validation_size]]
X_test = X_with_bias[random_indexes[-test_size:]]
y_test = y[random_indexes[-test_size:]]

In [7]:
# Implemenation of one hot encoding
def to_one_hot(y):
    number_of_classes = y.max() + 1
    m = len(y)
    Y_one_hot = np.zeros((m, number_of_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

In [8]:
Y_train_one_hot = to_one_hot(y_train)
Y_test_one_hot = to_one_hot(y_test)
Y_validation_one_hot = to_one_hot(y_validation)

In [9]:
# Softmax function
def softmax(logits):
    exps = np.exp(logits)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return exps/exp_sums

In [10]:
# Define number of input and output
n_inputs = X_train.shape[1]
n_outputs = len(np.unique(y_train))
print("Number of input:", n_inputs)
print("Number of outputs:", n_outputs)

Number of input: 3
Number of outputs: 3


In [11]:
# Train a softmax model
eta = 0.05
n_iteration = 5001
m = len(X_train)
epsilon = 1e-7

Theta = np.random.randn(n_inputs, n_outputs)

for iteration in range(n_iteration):
    logits = X_train.dot(Theta)
    Y_proba = softmax(logits)
    loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))
    error = Y_proba - Y_train_one_hot
    if iteration%500 == 0:
        print(iteration, loss)
    gradients = 1/m * X_train.T.dot(error)
    Theta = Theta - eta * gradients

0 3.5356045081790177
500 0.4711387636214726
1000 0.36581083713176793
1500 0.31328647878313015
2000 0.2797297275966953
2500 0.25570597398940204
3000 0.23734231165431838
3500 0.22269576109780229
4000 0.210660238747567
4500 0.20054860276937572
5000 0.19190588292458005


In [13]:
print("Model parameters: ")
print(Theta)

Model parameters: 
[[ 5.60687481 -1.02439696 -7.40120651]
 [-1.2010688   0.87371521  0.43203466]
 [-2.07975966 -0.56278888  4.5593004 ]]
