In [5]:
import numpy as np
import matplotlib.pyplot as plt

In [6]:
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

In [7]:
from sklearn.model_selection import train_test_split

X_with_bias = np.c_[np.ones([len(X), 1]), X]
train, test, y_train, y_test = train_test_split(X_with_bias, y, test_size = 0.2)

In [8]:
def to_one_hot(y):
    cols = y.max()+1
    rows = len(y)
    new = np.zeros((rows, cols))
    new[np.arange(rows), y] = 1 #wow
    return new

$\sigma\left(\mathbf{s}(\mathbf{x})\right)_k = \dfrac{\exp\left(s_k(\mathbf{x})\right)}{\sum\limits_{j=1}^{K}{\exp\left(s_j(\mathbf{x})\right)}}$

In [9]:
def softmax(logits):
    exps = np.exp(logits)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return exps / exp_sums

In [10]:
print('Data with bias as index 0:')
print(train[:5])
print('Labels:')
print(to_one_hot(y_train[:5]))

Data with bias as index 0:
[[ 1.   5.   3.5  1.6  0.6]
 [ 1.   7.7  2.6  6.9  2.3]
 [ 1.   6.3  2.5  4.9  1.5]
 [ 1.   5.8  2.7  4.1  1. ]
 [ 1.   6.4  2.7  5.3  1.9]]
Labels:
[[ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]


In [247]:
class Softmax_reg():
    def __init__(self, lr = 0.01, eps = 1e-7, iterations = 5001):
        self.lr = lr
        self.eps = eps
        self.iterations = iterations
        
    def fit(self, train, y):
        self.theta = np.random.randn(train.shape[1], len(np.unique(y)))
        one_hot_y = to_one_hot(y)
        m = len(train)
        for i in range(self.iterations):
            Y_proba = softmax(train.dot(self.theta))
            loss = -np.mean(np.sum(one_hot_y*np.log(Y_proba+ self.eps)))
            if i %5000 == 0:
                print(i, loss)
            error = Y_proba - one_hot_y
            gradients = 1/m * train.T.dot(error)
            self.theta -= self.lr * gradients
            
    def predict(self, data):
        data = np.array(data)
        return [np.argmax(i) for i in softmax(data.dot(self.theta)).reshape(len(data),3)]
    
    def score(self, data, labels):
        data = np.array(data)
        score = np.sum(self.predict(data) == labels) / len(data)
        return score
        

In [248]:
softi = Softmax_reg()

In [249]:
softi.fit(train, y_train)

0 847.262788193
5000 23.8131370003


In [250]:
softi.score(train, y_train)

0.94999999999999996