In [None]:
import math
import numpy as np
import matplotlib as mpl
import pandas as pd
np.random.seed(42)

In [None]:
import pandas as pd
import numpy as np

def extract_traindata(csv_path):

    df = pd.read_csv(csv_path)

    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].fillna(df[col].mode()[0])
        else:
            df[col] = df[col].fillna(df[col].median())

    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    return X, y

def append_numpy_to_csv(array_data, file_name, delimiter=',', include_header=False):
    try:
        df_new_data = pd.DataFrame(array_data)

        df_new_data.to_csv(
            file_name,
            mode='a',          
            sep=delimiter,
            header=include_header, 
            index=False       
        )
        print(f"Successfully appended NumPy array data to '{file_name}'.")

    except Exception as e:
        print(f"An error occurred while appending to CSV: {e}")

In [None]:
def extract_testdata(csv_path):
    df = pd.read_csv(csv_path)

    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].fillna(df[col].mode()[0])

    X = df.iloc[:, :].values

    return X

In [None]:
def comp_cost(X,y,w,b,lamda):
    m,n = X.shape
    f_wb=np.dot(X,w)+b
    loss=(f_wb-y)**2
    cost=np.sum(loss)/(2*m)
    cost_w=(lamda/(2*m))*np.sum(w**2)

    total_cost = cost+cost_w
    return total_cost

In [None]:
def comp_gradient(X,y,w,b,lamda):
    m,n = X.shape
    f_wb=np.dot(X,w)+b
    error=(f_wb-y)
    dj_dw=np.dot(X.T,error)/m
    dj_db=np.sum(error)/m
    dj_dw=dj_dw+(lamda/m)*w
    return dj_dw,dj_db

In [None]:
def gradient_descent(X,y,w_in,b_in,alpha=0.001,iters=1000,lamda=0.0,batch_size=128,optimizer="adam",beta1=0.9,beta2=0.999,eps=1e-8,tol=1e-6,patience=20,shuffle=True):
    m,n=X.shape
    w=w_in.copy()
    b=b_in
    J_data=[]
    best_cost = np.inf
    wait = 0

    vw = np.zeros_like(w)
    vb = np.zeros_like(b)
    sw = np.zeros_like(w)
    sb = np.zeros_like(b)

    J_data = []
    best_cost = np.inf
    wait = 0
    t = 0
    for epoch in range(iters):

        if shuffle:
            rng = np.random.default_rng(42 + epoch)
            perm = rng.permutation(m)

            X = X[perm]
            y = y[perm]

        for i in range(0, m, batch_size):
            X_batch = X[i:i+batch_size]
            Y_batch = y[i:i+batch_size]

            dj_dw, dj_db = comp_gradient(
                X_batch, Y_batch, w, b, lamda
            )

            if optimizer == "adam":
                t += 1

                #momentum
                vw = beta1 * vw + (1 - beta1) * dj_dw
                vb = beta1 * vb + (1 - beta1) * dj_db

                #RMSProp
                sw = beta2 * sw + (1 - beta2) * (dj_dw ** 2)
                sb = beta2 * sb + (1 - beta2) * (dj_db ** 2)

                #bias correction
                vw_hat = vw / (1 - beta1 ** t)
                vb_hat = vb / (1 - beta1 ** t)
                sw_hat = sw / (1 - beta2 ** t)
                sb_hat = sb / (1 - beta2 ** t)

                #update
                w -= alpha * vw_hat / (np.sqrt(sw_hat) + eps)
                b -= alpha * vb_hat / (np.sqrt(sb_hat) + eps)

            else:  #normal GD
                w -= alpha * dj_dw
                b -= alpha * dj_db

        #adding cost to list of cost history
        cost = comp_cost(X, y, w, b, lamda)
        J_data.append(cost)

        #detecting divergence
        if np.isnan(cost) or np.isinf(cost):
            print("Training diverged. Stopping early.")
            break

        #early stopping
        if best_cost - cost > tol:
            best_cost = cost
            wait = 0
        else:
            wait += 1

        if wait >= patience:
            print(f"Early stopping at iteration {epoch}")
            break

        #printing iteration, cost and learning rate
        if epoch % max(1, iters // 10) == 0:
            print(f"Epoch {epoch} | Cost: {cost:.6f}")

    return w,b,J_data