# Losses can have non optimal local minimas

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Generate dataset: y = 4 + 2*x1 + 3*x2 + noise
np.random.seed(42)
X = np.random.rand(100, 2) * 2 - 1  
y = 4 + 2 * X[:, 0] + 3 * X[:, 1] + np.random.randn(100) * 0.5  

# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Huber loss function
def huber_loss(y_true, y_pred, delta=1.0):
    error = y_true - y_pred
    abs_error = np.abs(error)
    quadratic = 0.5 * (error ** 2)
    linear = delta * (abs_error - 0.5 * delta)
    return np.mean(np.where(abs_error < delta, quadratic, linear))

# Loss function with non-linearity
def compute_loss(w1, w2, X, y):
    b1, b2 = 0.5, -0.5  
    W_out = np.array([[2.0], [-2.0]])  
    b_out = 0.3  

    hidden_input = np.dot(X, np.array([[w1, w2]]).T) + np.array([[b1, b2]])
    hidden_output = relu(hidden_input)  
    final_output = np.dot(hidden_output, W_out) + b_out  

    return huber_loss(y, final_output.flatten(), delta=1.0)

# Generate weight range
w_values = np.linspace(-5, 5, 100)
loss_w1 = np.zeros(100)
loss_w2 = np.zeros(100)

fixed_w1, fixed_w2 = 1.0, -1.0  

# Compute loss for individual weights
for i, w in enumerate(w_values):
    loss_w1[i] = compute_loss(w, fixed_w2, X, y)
    loss_w2[i] = compute_loss(fixed_w1, w, X, y)

# Plot Loss vs W1 and W2
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

axes[0].plot(w_values, loss_w1, label="Loss w.r.t W1")
axes[0].set_xlabel("Weight 1")
axes[0].set_ylabel("Loss")
axes[0].set_title("Loss vs W1 (ReLU + Huber)")
axes[0].grid()

axes[1].plot(w_values, loss_w2, label="Loss w.r.t W2", color='r')
axes[1].set_xlabel("Weight 2")
axes[1].set_ylabel("Loss")
axes[1].set_title("Loss vs W2 (ReLU + Huber)")
axes[1].grid()

plt.tight_layout()
plt.show()


# GD

In [None]:
import pandas as pd

In [None]:
x1=np.random.randint(low=1,high=20,size=20000)
x2=np.random.randint(low=1,high=20,size=20000)
y=3+2*x1-4*x2+np.random.random(20000)

In [None]:
x=pd.DataFrame({'intercept':np.ones(20000),'x1':x1,'x2':x2})

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
sk_model=LinearRegression()

In [None]:
sk_model.fit(pd.DataFrame({'x1':x1,'x2':x2}),y)

In [None]:
w_sk=[sk_model.intercept_]+list(sk_model.coef_)

In [None]:
w_sk

In [None]:
w=np.random.random(x.shape[1])

In [None]:
w

In [None]:
def my_pred(x,w):

    preds=x@w
    return preds

In [None]:
def my_error(y,x,w):

    error=y-my_pred(x,w)
    return error

In [None]:
def my_cost(y,x,w):
    error=my_error(y,x,w)
    cost=error.T@error
    return cost

In [None]:
def grad(y,x,w):

    error=my_error(y,x,w)
    grad=-x.T@error/x.shape[0]
    return np.array(grad)

In [None]:
grad(y,x,w)

In [None]:
def lr_gd(y,x,learning_rate,num_steps):

    wts=np.zeros(x.shape[1])
    cost=[]
    for i in range(num_steps):
        wts-=learning_rate*grad(y,x,wts)

        cost.append(my_cost(y,x,wts))
        
    return cost,wts

        

In [None]:
cost_gd,w_gd=lr_gd(y,x,.001,1000)

In [None]:
w_sk

In [None]:
w_gd

In [None]:
def lr_sgd(y,x,learning_rate,num_steps):

    wts=np.zeros(x.shape[1])
    cost=[]
    for i in range(num_steps):
        rand_ind=np.random.choice(range(x.shape[0]),10)
        y_sub=y[rand_ind]
        x_sub=x.iloc[rand_ind,:]

        
        wts-=learning_rate*grad(y_sub,x_sub,wts)

        cost.append(my_cost(y,x,wts))
        
    return cost,wts

In [None]:
cost_sgd,w_sgd=lr_sgd(y,x,.001,1000)

In [None]:
w_sgd

In [None]:
pd.DataFrame({'cost_gd':np.log(cost_gd),'cost_sgd':np.log(cost_sgd)}).plot()

In [None]:
def lr_sgd_mom(y,x,learning_rate,num_steps):

    wts=np.zeros(x.shape[1])
    cost=[]
    gamma=0.9
    v=np.zeros(x.shape[1])
    for i in range(num_steps):
        rand_ind=np.random.choice(range(x.shape[0]),10)
        y_sub=y[rand_ind]
        x_sub=x.iloc[rand_ind,:]

        g=grad(y_sub,x_sub,wts)
        v=gamma*v+(1-gamma)*g

        v_adj=v/(1-gamma**(i+1))
        
        wts-=learning_rate*v_adj

        cost.append(my_cost(y,x,wts))
        
    return cost,wts

In [None]:
cost_sgd_mom,w_sgd_mom=lr_sgd_mom(y,x,.01,1000)

In [None]:
w_sgd_mom

In [None]:
pd.DataFrame({'cost_gd':np.log(cost_gd),'cost_sgd':np.log(cost_sgd),'cost_sgd_mom':np.log(cost_sgd_mom)}).plot()