### === Task ===

1. With the iris data given in class, implement train_test_split from scratch.

2. Put everything into a class called LogisticRegression, this class should allow you choose any of the training methods you'd like including "batch", "minibatch" and "sto". However, if the input method is not one of the three, it should "raise ValueError".

3. Calculate time taken to fit your models using different training methods.

4. Perform a classification on the dataset using all 3 methods and also show what happens if your defined training method is not either "batch", "minibatch" or "sto". Make sure to plot the training losses.

5. Simply, use classification_report from sklearn.metrics to evaluate your models.

6. Discuss your results ie. training losses of the three methods and time taken to fit models.

In [5]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from time import time

# import some data
iris = datasets.load_iris()
X = iris.data[:, 2:]
y = iris.target

# feature scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

# data split
train_size = round(0.7 * X.shape[0])
X_train = X[:train_size, :]
Y_train = y[:train_size]
X_test = X[train_size:,:]
Y_test = y[train_size:]

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

# add intercept to our X
intercept = np.ones((X_train.shape[0], 1))
X_train   = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test    = np.concatenate((intercept, X_test), axis=1)

k = len(set(y))
m = X_train.shape[0]
n = X_train.shape[1]
Y_train_encoded = np.zeros((m, k))
for each_class in range(k):
    cond = Y_train==each_class
    Y_train_encoded[np.where(cond), each_class] = 1

(105, 2)
(105,)
(45, 2)
(45,)


In [49]:
class LogisticRegression:
    def __init__(self, method="minibatch", max_iter=1000, l_rate=0.01, batch_size_ratio=0.1):
        if (method != "minibatch") & (method != "batch") & (method != "sto"):
            raise ValueError("Method is not match")
        else:
            self.method = method
            self.max_iter = max_iter
            self.l_rate = l_rate
            self.batch_size_ratio = batch_size_ratio

    def fit(self, X, Y):
        m = X.shape[0]
        n = X.shape[1]
        k = Y.shape[1]
        start = time()
        self.W = np.random.rand(n, k)
        batch_size = round(self.batch_size_ratio*m)
        list_of_used_ix = []
        for i in range(self.max_iter):
            if self.method == "minibatch":
                idx = np.random.randint(0, m-batch_size)
                X_batch = X[idx:idx+batch_size]
                Y_batch = Y[idx:idx+batch_size]
            elif self.method == "batch":
                X_batch = X
                Y_batch = Y
            elif self.method == "sto":
                idx = np.random.randint(X_train.shape[0])
                while idx in list_of_used_ix:
                    idx = np.random.randint(X_train.shape[0])
                X_batch = X[idx, :].reshape(1, -1)
                Y_batch = Y_train_encoded[idx]                
                list_of_used_ix.append(idx)
                if len(list_of_used_ix) == X_train.shape[0]:
                    list_of_used_ix = []

            cost, grad =  self.gradient(X_batch, Y_batch, self.W)
            if i % 500 == 0:
                print(f"Cost at iteration {i}", cost)
            self.W = self.W - self.l_rate * grad
        self.runtime = time()-start

    def gradient(self, X, Y, W):
        m = X.shape[0]
        h = self.h_theta(X, W)
        cost = - np.sum(Y * np.log(h)) / m
        error = h - Y
        grad = self.softmax_grad(X, error)
        return cost, grad

    def softmax_grad(self, X, error):
        return  X.T @ error
            
    def softmax(self, theta_t_x):
        return np.exp(theta_t_x) / np.sum(np.exp(theta_t_x), axis=1, keepdims=True)

    def h_theta(self, X, W):
        return self.softmax(X @ W)

    def plot_losses(self):
        pass

In [52]:
model_mini = LogisticRegression(method="minibatch")
model_mini.fit(X_train, Y_train_encoded)

Cost at iteration 0 0.682254458275567
Cost at iteration 500 0.020909133773342584


In [55]:
model_batch = LogisticRegression(method="batch")
model_batch.fit(X_train, Y_train_encoded)

Cost at iteration 0 0.9020057063563102
Cost at iteration 500 0.029143148012840923


In [56]:
model_sto = LogisticRegression(method="sto")
model_sto.fit(X_train, Y_train_encoded)

Cost at iteration 0 0.7962857007453462
Cost at iteration 500 0.2585207486279982


In [57]:
print(model_sto.runtime)
print(model_mini.runtime)
print(model_batch.runtime)

0.04671430587768555
0.0329127311706543
0.03390955924987793
