### combination of adagrad with mini batch then stochastic in multivariable

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

#### read data

In [3]:
data = np.loadtxt("MultipleLR.csv", delimiter=",")
X = data[:, 0:3]
ones = np.ones(len(X))
X = np.insert(X, 0, ones, axis=1)
y = data[:, -1]

#### Adagrad in mini batch 

In [4]:
def adagrad_mini_batch(X, y, alpha, epsilon, iterations, batch_size):
    m, n = X.shape
    thetas = np.zeros((n, 1))
    v = 0
    J = []
    old_cost = 0
    counter = 0
    for _ in range(iterations):
        for i in range(0, m, batch_size):

            x_batched = X[i : i + batch_size]
            y_batched = y[i : i + batch_size]

            hyp = np.dot(x_batched, thetas)
            
            error = hyp - y_batched.reshape(len(y_batched), 1)
            cost = np.sum((error)**2) / (2*len(y_batched))
            J.append(cost)
            gradient = np.sum(x_batched.T @ error) / len(y_batched)

            v = v + gradient**2
            thetas = thetas - ((alpha * gradient) / (np.sqrt(v) + epsilon))
        if abs(old_cost - cost) < 1e-3:
            break
        old_cost = cost
        counter += 1
    y_predicted = X @ thetas
    return J, counter, y_predicted

In [5]:
J, counter, y_predicted = adagrad_mini_batch(X, y, 0.1, 1e-8, 1000, 5)
print(f"iteration = {counter}")
print(f"r2_score = {r2_score(y_predicted, y)}")

iteration = 35
r2_score = 0.9773599102436411


#### Adagrad in stochastic

In [6]:
def adagrad_stochastic(X, y, alpha, epsilon, iterations):
    m, n = X.shape
    thetas = np.zeros((n, 1))
    v = 0
    J = []
    old_cost = 0
    counter = 0
    for _ in range(iterations):
        for i in range(0, m):
            hyp = np.dot(X[i].reshape(1, n), thetas)
            
            error = hyp - y[i]
            cost = error**2
            J.append(cost)
            
            gradient = (X[i].reshape(1, n).T @ error) * 2

            v = v + gradient**2
            thetas = thetas - ((alpha * gradient) / (np.sqrt(v) + epsilon))
        if abs(old_cost - cost) < 1e-3:
            break
        old_cost = cost
        counter += 1
    y_predicted = X @ thetas
    return J, counter, y_predicted

In [7]:
J, counter, y_predicted = adagrad_stochastic(X, y, 0.1, 1e-8, 1000)
print(f"iteration = {counter}")
print(f"r2_score = {r2_score(y_predicted, y)}")

iteration = 7
r2_score = 0.9777833578571621
