# Implementing Softmax regression without using scikit-learn

In [9]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import label_binarize
iris = load_iris()
X = iris['data']
y = label_binarize(iris['target'], classes = [0, 1, 2])

In [273]:
class SoftmaxRegression:
    def __init__(self):
        #weight matrix, shape = (input, output)
        self.W = None

    def softmax(self, scores):
        return np.exp(scores) / np.expand_dims(np.sum(np.exp(scores), axis = 1), axis = -1)

    def learning_schedule(self, eta_):
        return eta_ * 0.95
        
    def fit(self, X, y, warm_start = False, epochs = 1000, eta = 0.01, batch_size = 4):
        if len(X.shape) < 2 or len(y.shape) < 2:
            print('Check the shapes of the input data!')
            raise ValueError
        elif X.shape[0] != y.shape[0]:
            print('You have entered the wrong number of samples for the data and targets!')
            raise ValueError
        X = np.c_[X, np.ones(shape = (X.shape[0], 1))] #add the bias to the input
        
        if not warm_start:
            self.W = np.random.randn(X.shape[1], y.shape[1])
        elif warm_start and type(self.W) == type(None):
            print('Cannot use warm start!', '\n Pass some data so that the model can initialize the weights!')
        
        m = X.shape[0]

        for epoch in range(epochs):
            batch_indexes = list(range(0, m, batch_size))
            if batch_indexes[-1] != m-1:
                batch_indexes.append(m-1)

            for index in range(len(batch_indexes) - 1):
                X_batch = X[batch_indexes[index]:batch_indexes[index+1], :]
                y_batch = y[batch_indexes[index]:batch_indexes[index+1], :]
                current_batch_size = batch_indexes[index + 1] - batch_indexes[index]
                
                scores = X_batch.dot(self.W)
                probability = self.softmax(scores)

                cost = 0
                for i in range(current_batch_size):
                    cost += np.sum(y_batch[i] * np.log(probability[i]))
                cost = -1/current_batch_size * cost

                gradient = np.zeros(shape = self.W.shape)
                for i in range(current_batch_size):
                    gradient += np.expand_dims(X_batch[i], axis = -1) @ np.expand_dims(probability[i] - y_batch[i], axis = -1).T
                gradient = 1/current_batch_size * gradient
                self.W = self.W - eta * gradient
            
            eta = self.learning_schedule(eta)
            
            
            

            
    def predict(self, X):
        if len(X.shape) < 2:
            X = np.expand_dims(X, axis = -1).T
        X = np.c_[X, np.ones(shape = (X.shape[0], 1))]
        scores = X.dot(self.W)
        probability = self.softmax(scores)
        return probability



In [274]:
from sklearn.model_selection import train_test_split
softmax_reg = SoftmaxRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
type(softmax_reg.W)

NoneType

In [311]:
softmax_reg.fit(X_train, y_train, epochs = 1000, batch_size=4)

In [312]:
y_pred = softmax_reg.predict(X_test)

In [313]:
from sklearn.metrics import accuracy_score
y_pred_sparse = np.argmax(y_pred, axis = 1)
y_test_sparse = np.argmax(y_test, axis = 1)
accuracy_score(y_test_sparse, y_pred_sparse)

1.0