In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [1]:
def ADAM_BGD_LR(x, y, alpha=0.05, itirations=1000,momentum_beta=.99, adaptive_beta=.9,gradient_norm=0.001, loss_condition=0.001):
    from sklearn.metrics import r2_score
    
    # Initialization and Data Preparation
    m = x.shape[0]
    loss = []
    x = np.array(x)
    y = np.array(y).reshape(-1, 1)
    
    # Add bias term (column of ones)
    x = np.concatenate((np.ones((m, 1)), x), axis=1)
    
    # Initialize weights and the adaptive gradient accumulator with zeros
    w = np.zeros(x.shape[1]).reshape(-1, 1)
    thetas = [] 
    adaptive = np.zeros(x.shape[1]).reshape(-1, 1)
    momentum= np.zeros(x.shape[1]).reshape(-1, 1)
    
    for i in range(itirations):
        # Hypothesis and Error calculation
        hx = x @ w
        e = hx - y
        
        # Calculate Cost Function (Mean Squared Error)
        j = e.T @ e / (2 * m)
        loss.append(j)
        
        # Calculate Gradient (G)
        G = (x.T @ e) / m
        
        
        # Accumulate squared gradients for adaptive learning rate
        adaptive =adaptive_beta*adaptive+ (1-adaptive_beta)*G**2
        momentum=momentum_beta*momentum+(1-momentum_beta)*G
        # Compute the weight update step using the Adagrad formula
        vt = (alpha / np.sqrt(adaptive + 1e-8)) * momentum
        
        # Save current weights before update
        thetas.append(w.copy())
        
        # Check convergence based on Gradient Norm
        if np.linalg.norm(G) <= (gradient_norm):
            print(f''' Gradient Norm condition
            stop in epoch number {i} 
            gradient norm = {np.linalg.norm(G)}''')
            break
            
        # Check convergence based on relative Loss change
        if i > 2 and np.absolute(loss[i-1] - loss[i]) < loss_condition:
            print(f'''loss condition
            stop in epoch number {i} 
            gradient norm = {np.linalg.norm(G)}
            loss[i]-loss[i-1]={loss[i]-loss[i-1]}
            ''')
            break
        
        # Update weights
        m_hat = momentum / (1 - momentum_beta**(i + 1))
        v_hat = adaptive / (1 - adaptive_beta**(i + 1))
        w = w - (alpha / (np.sqrt(v_hat) + 1e-8)) * m_hat
        
    # Final prediction and R2 score evaluation
    hx = x @ w
    print('you finished your iterations')
    print(f'R2 score ={r2_score(y, hx)}')
    
    return np.array(w), hx, np.array(loss), np.array(thetas)
