In [1]:
import numpy as np

class SoftmaxRegression(object):
    
    def __init__(self, learning_rate=0.1, num_steps=1000,
                 regulariser=None,
                 lamda=0,
                 n_classes=None,
                 initial_wts=None):

        self.learning_rate = learning_rate
        self.num_steps = num_steps
        self.regulariser = regulariser
        self.n_classes = n_classes
        self.lamda = lamda
        self.initial_wts = initial_wts

    def _fit(self, X, y, init_params=True):
        if init_params:
            if self.n_classes is None:
                self.n_classes = np.max(y) + 1
            self._n_features = X.shape[1]

            
            self.b_, self.w_ = self._init_params(
                weights_shape=(self._n_features, self.n_classes),
                bias_shape=(self.n_classes,))
            
            if self.initial_wts:
                self.b_, self.w_ = np.split(self.initial_wts, [1])
                
            self.cost_ = []

        y_enc = self._one_hot(y=y, n_labels=self.n_classes, dtype=np.float)

        for i in range(self.num_steps):
            for idx in self._yield_batch_idx(
                    data=y):
                net = self._net_input(X[idx], self.w_, self.b_)
                softm = self._softmax(net)
                diff = softm - y_enc[idx]
                mse = np.mean(diff, axis=0)

                grad = np.dot(X[idx].T, diff)
                
                self.w_ -= (self.learning_rate * grad +
                            self.learning_rate * self.lamda * self.w_)
                self.b_ -= (self.learning_rate * np.sum(diff, axis=0))

            net = self._net_input(X, self.w_, self.b_)
            softm = self._softmax(net)
            cross_ent = self._cross_entropy(output=softm, y_target=y_enc)
            cost = self._cost(cross_ent)
            self.cost_.append(cost)
        return self

    def fit(self, X, y, init_params=True):
        self._fit(X=X, y=y, init_params=init_params)
        self._is_fitted = True
        return self
    
    def _predict(self, X):
        probas = self.predict_proba(X)
        return self._to_classlabels(probas)
 
    def predict(self, X):
        return self._predict(X)

    def predict_proba(self, X):
        net = self._net_input(X, self.w_, self.b_)
        softm = self._softmax(net)
        return softm

    def _net_input(self, X, W, b):
        return (X.dot(W) + b)

    def _softmax(self, z):
        return (np.exp(z.T) / np.sum(np.exp(z), axis=1)).T

    def _cross_entropy(self, output, y_target):
        return - np.sum(np.log(output) * (y_target), axis=1)

    def _cost(self, cross_entropy):
        L1_term = self.lamda * np.sum(np.abs(self.w_))
        L2_term = self.lamda * np.sum(self.w_ ** 2)
        if self.regulariser == 'l1':
            cross_entropy = cross_entropy + L1_term
        if self.regulariser == 'l2':
            cross_entropy = cross_entropy + L2_term
        return 0.5 * np.mean(cross_entropy)

    def _to_classlabels(self, z):
        return z.argmax(axis=1)
    
    def _init_params(self, weights_shape, bias_shape=(1,), dtype='float64',
                     scale=1):
        w = np.random.normal(loc=0.0, scale=scale, size=weights_shape)
        b = np.zeros(shape=bias_shape)
        return b.astype(dtype), w.astype(dtype)
    
    def _one_hot(self, y, n_labels, dtype):
        mat = np.zeros((len(y), n_labels))
        for i, val in enumerate(y):
            mat[i, val] = 1
        return mat.astype(dtype)    
    
    def _yield_batch_idx(self, data):
        indices = np.arange(data.shape[0])
        yield indices

In [2]:
from sklearn import datasets

In [3]:
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X_train, X_temp, y_train, y_temp = train_test_split(iris.data, iris.target, test_size=.4)

logi = SoftmaxRegression().fit(X_train, y_train)




In [4]:
logi.predict(X_train)

array([0, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 2, 2, 0, 1, 1, 1, 0, 2, 1, 2, 1, 2, 2, 0, 0, 2, 0, 0, 0, 1, 1,
       2, 2, 1, 0, 0, 1, 2, 2, 2, 0, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 1, 2, 1, 0, 2, 1, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2,
       1, 2])

In [5]:
y_train

array([0, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 2, 2, 0, 1, 1, 1, 0, 2, 1, 2, 1, 2, 2, 0, 0, 2, 0, 0, 0, 2, 1,
       2, 2, 1, 0, 0, 1, 2, 2, 2, 0, 0, 1, 0, 1, 2, 0, 2, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 1, 2, 1, 0, 2, 1, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2,
       1, 2])