Implement and train Softmax Regression with mini-batch SGD and early stopping.

The expected outcome.
* Implement Softmax Regression Model.
* Implement mini-batch SGD.
* The training should support early stopping.
* Train and evaluate the model with cross-validation. The evaluation metric is the *accuracy*.
* Retrain the model with early stopping.


**DO NOT USE SKLEARN**

In [2]:
import numpy as np
import pandas as pd 

from sklearn import datasets
from sklearn.model_selection import StratifiedShuffleSplit

np.random.seed(42)

In [3]:

iris = datasets.load_iris()
X = iris["data"]
y = iris["target"]
df = pd.DataFrame({fname: values for fname, values in zip(iris["feature_names"], X.T)})
df["target"] = y

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## Your Code
You can start writing your code from here. Please don't modify any of the previous code.

In [8]:
class softmaxRegressor():
    def __init__(self, lr=0.01, epochs=100, early_stopping=True):
        import numpy as np
        self.epochs = epochs
        self.lr = lr
        self.early_stopping = early_stopping
        self.w = None
        self.num_features = None     #Holds the number of features of training data to check the dimensions at prediction.
        self.is_trained = False      #Flag to denote if the model has been trained
    
    def softmax(self, z):
        exp = np.exp(z)  
        for i in range(len(z)):
            exp[i] /= np.sum(exp[i])
        return exp
    
    def fit(self, X, y, batch_size=None):
        if type(X) != np.ndarray:
            X = X.to_numpy()
            
        m, n = X.shape    
        x0 = np.ones((m, 1))        #Bias term.
        X = np.hstack((x0, X))
        
        y_hot = pd.get_dummies(y)   #One hot encoder.
        c = y_hot.shape[1]          #Number of classes.
        
        w = np.zeros((n+1, c))      #Weights initialization.
        
        loss_lst = []
        #If batch size is not specified: operate on all the data
        if batch_size == None: batch_size = m
        
        
        for i in range(self.epochs):
            for j in range(0, m, batch_size):
                #Load next batch:
                x_b = X[j:j+batch_size, :]
                y_b = y.iloc[j:j+batch_size]
                y_b_hot = y_hot.iloc[j:j+batch_size, :]
                m = x_b.shape[0]
                    
                z = x_b @ w
                y_hat = self.softmax(z)
                
                loss = -np.mean(np.log(y_hat[np.arange(len(y_b)), y_b]))
                loss_lst.append(loss)
                
                dw = np.dot(x_b.T, (y_hat-y_b_hot))/m  #weights Gradient
                w -= self.lr*dw                        #Weights update
                
            #Early stopping Condition:
            if (self.early_stopping == True) and (abs(loss_lst[-1]-loss_lst[-2]) <=0.001):
                print(f"Training Stopped at the {i}th epoch")
                break
        
        
        self.w = w
        self.num_features = n
        self.is_trained = True
        return self
        
    def predict(self, X):
        m, n = X.shape
        #Check if the model is trained:
        assert self.is_trained, "Model is not trained"
        #Check input size:
        assert n == self.num_features, f"Input shape mismatch: expected {self.num_features} features but got {n}"

        x0 = np.ones((m, 1))    #Bias term.
        X = np.hstack((x0, X))

        z = X @ self.w
        y_hat = self.softmax(z)

        # Returning the class with highest probability.
        return np.argmax(y_hat, axis=1)
    
    #Predict and Calculate accuracy
    def score(self, X, y):
        y_hat = self.predict(X)
        return np.sum(y==y_hat)/len(y)
        
        

Using the following cell to train and evaluate your model.

In [17]:
split = StratifiedShuffleSplit(n_splits=3, test_size=0.2, random_state=42)
for train_index, test_index in split.split(df, df["target"]):
    strat_train_set = df.loc[train_index]
    strat_test_set = df.loc[test_index]
    
    
    # Use strat_train_set and strat_test_set to train and evaluate your model
    model = softmaxRegressor(lr=0.07, 
                             epochs=500, 
                             early_stopping=True)
    
    model.fit(strat_train_set.drop(columns=["target"]),
              strat_train_set["target"],
              batch_size=25)
    
    print("Test set accuracy:")
    print(model.score(strat_test_set.drop(columns=["target"]),
                      strat_test_set["target"]))
    
    

Training Stopped at the 136th epoch
Test set accuracy:
0.9333333333333333
Training Stopped at the 112th epoch
Test set accuracy:
0.9666666666666667
Training Stopped at the 144th epoch
Test set accuracy:
0.9333333333333333
