# Newton's Method

In [1]:
import pandas as pd
import numpy as np

def newtons_method(x, y, epochs=1):
    # Feature scaling (same as before)
    x_min, x_max = x.min(), x.max()
    y_min, y_max = y.min(), y.max()

    x_scaled = (x - x_min) / (x_max - x_min)
    y_scaled = (y - y_min) / (y_max - y_min)

    # Add bias (intercept) term to feature matrix
    X = np.column_stack((np.ones_like(x_scaled), x_scaled))  # Shape: (n_samples, 2)
    y = y_scaled.reshape(-1, 1)  # Shape: (n_samples, 1)

    # Initialize weights (theta = [b, m])
    theta = np.zeros((2, 1))

    for epoch in range(epochs):
        y_pred = X @ theta
        error = y - y_pred

        # Gradient
        grad = -2 * X.T @ error / len(x)

        # Hessian
        hessian = 2 * X.T @ X / len(x)

        # Newton's update: θ = θ - H⁻¹ * grad
        try:
            hessian_inv = np.linalg.inv(hessian)
        except np.linalg.LinAlgError:
            print("Hessian is not invertible.")
            return None, None

        theta -= hessian_inv @ grad

        # Optional: print progress
        cost = np.mean(error ** 2)
        if epoch % 1 == 0:
            print(f"Epoch {epoch}: Cost = {cost}, b = {theta[0,0]}, m = {theta[1,0]}")

    # Rescale weights to original y-scale
    m_scaled = theta[1, 0]
    b_scaled = theta[0, 0]

    m_original = m_scaled * (y_max - y_min) / (x_max - x_min)
    b_original = b_scaled * (y_max - y_min) + y_min - m_scaled * (y_max - y_min) * x_min / (x_max - x_min)

    return b_original, m_original


if __name__ == "__main__":
    df = pd.read_csv("home_prices.csv")
    x = df["area_sqr_ft"].to_numpy()
    y = df["price_lakhs"].to_numpy()

    b, m = newtons_method(x, y)
    print(f"Final Results (Newton): m={m}, b={b}")


Epoch 0: Cost = 0.2564831062314152, b = 0.1282151752880576, m = 0.5853854285191946
Final Results (Newton): m=0.05168175548989393, b=18.046501102723404


# Gradient Descent 

In [2]:
def gradient_descent(x, y, lr=0.1, epochs=950):
    # Scale x and y using Min-Max Scaling
    x_min, x_max = x.min(), x.max()
    y_min, y_max = y.min(), y.max()

    x_scaled = (x - x_min) / (x_max - x_min)
    y_scaled = (y - y_min) / (y_max - y_min)

    # Initialize parameters
    b = 0.0  # Intercept
    m = 0.0  # Slope
    n = len(y_scaled)  # Number of data points

    # Perform gradient descent
    for epoch in range(epochs):
        y_pred = b + m * x_scaled  # Predicted y values
        error = y_scaled - y_pred  # Error in prediction
        cost = np.mean(error ** 2)   # Mean squared error

        # Calculate gradients
        db = -2 * np.mean(error)  # Derivative w.r.t. intercept b
        dm = -2 * np.mean(error * x_scaled)  # Derivative w.r.t. slope m

        # Update parameters
        b -= lr * db
        m -= lr * dm

        # Optional: Print cost every 100 iterations to monitor progress
        if epoch % 50 == 0:
            print(f"Epoch {epoch}: Cost = {cost}, b = {b}, m = {m}")

    # Scale back the coefficients to original scale
    b_original = b * (y_max - y_min) + y_min - m * (y_max - y_min) * x_min / (x_max - x_min)
    m_original = m * (y_max - y_min) / (x_max - x_min)

    return b_original, m_original


if __name__ == "__main__":
    df = pd.read_csv("home_prices.csv")

    x = df["area_sqr_ft"].to_numpy()
    y = df["price_lakhs"].to_numpy()

    b, m = gradient_descent(x, y)

    print(f"Final Results: m={m}, b={b}")



Epoch 0: Cost = 0.2564831062314152, b = 0.08397689768976899, m = 0.05399231750098086
Epoch 50: Cost = 0.04784936980284014, b = 0.22154077199551966, m = 0.4133814830788247
Epoch 100: Cost = 0.045238527469297275, b = 0.16916604992569337, m = 0.5099109453609146
Epoch 150: Cost = 0.044735832629386664, b = 0.14618422344597073, m = 0.552267585870816
Epoch 200: Cost = 0.044639043137800115, b = 0.13609990777687922, m = 0.5708534773777333
Epoch 250: Cost = 0.04462040716834547, b = 0.131674958236711, m = 0.579008877781275
Epoch 300: Cost = 0.04461681897553835, b = 0.1297333115176334, m = 0.5825874285178556
Epoch 350: Cost = 0.04461612810041902, b = 0.12888132621305937, m = 0.5841576794853888
Epoch 400: Cost = 0.044615995078461666, b = 0.1285074791315702, m = 0.5848466981084404
Epoch 450: Cost = 0.044615969466248066, b = 0.12834343681817856, m = 0.5851490361886121
Epoch 500: Cost = 0.044615964534840506, b = 0.12827145583473568, m = 0.585281700693843
Epoch 550: Cost = 0.044615963585341145, b = 0.1

In [10]:
# Expected answer. m = 0.05168176, b=18.0465

# 

In [13]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Load dataset
df = pd.read_csv("home_prices.csv")
X = df.drop(columns=["price_lakhs"]).to_numpy()  # Multiple features
y = df["price_lakhs"].to_numpy()

# Feature scaling (important for optimization stability)
X_min, X_max = X.min(axis=0), X.max(axis=0)
X_scaled = (X - X_min) / (X_max - X_min)

y_min, y_max = y.min(), y.max()
y_scaled = (y - y_min) / (y_max - y_min)

# Add intercept (bias) term
X_aug = np.column_stack([np.ones(X_scaled.shape[0]), X_scaled])

# Initialize weights (including intercept)
initial_theta = np.zeros(X_aug.shape[1])

# Cost function: Mean Squared Error
def cost_function(theta):
    y_pred = X_aug @ theta
    error = y_scaled - y_pred
    return np.mean(error ** 2)

# Gradient (Jacobian) of cost function
def gradient(theta):
    y_pred = X_aug @ theta
    error = y_scaled - y_pred
    grad = -2 * X_aug.T @ error / len(y)
    return grad

# --- 1️⃣ BFGS ---
print("\n----- Using BFGS -----")
bfgs_result = minimize(
    cost_function,
    initial_theta,
    method='BFGS',
    jac=gradient,
    options={'disp': True}
)

theta_bfgs = bfgs_result.x

# --- 2️⃣ L-BFGS-B ---
print("\n----- Using L-BFGS-B -----")
lbfgs_result = minimize(
    cost_function,
    initial_theta,
    method='L-BFGS-B',
    jac=gradient,
    options={'disp': True}
)

theta_lbfgs = lbfgs_result.x

# ---- Rescale the result back to original y-scale ----
def rescale_theta(theta_scaled):
    intercept_scaled = theta_scaled[0]
    weights_scaled = theta_scaled[1:]

    m_original = weights_scaled * (y_max - y_min) / (X_max - X_min)
    b_original = (intercept_scaled * (y_max - y_min)
                  + y_min
                  - np.sum(m_original * X_min))

    return b_original, m_original

b_bfgs, m_bfgs = rescale_theta(theta_bfgs)
b_lbfgs, m_lbfgs = rescale_theta(theta_lbfgs)

print(f"\nBFGS Results:\nIntercept: {b_bfgs}\nCoefficients: {m_bfgs}")
print(f"\nL-BFGS-B Results:\nIntercept: {b_lbfgs}\nCoefficients: {m_lbfgs}")



----- Using BFGS -----
Optimization terminated successfully.
         Current function value: 0.044616
         Iterations: 4
         Function evaluations: 6
         Gradient evaluations: 6

----- Using L-BFGS-B -----

BFGS Results:
Intercept: 18.046501102723425
Coefficients: [0.05168176]

L-BFGS-B Results:
Intercept: 18.046501102723454
Coefficients: [0.05168176]


## Traditional Machine Learining 

In [36]:
df

Unnamed: 0,area_sqr_ft,price_lakhs
0,656,39.0
1,1260,83.2
2,1057,86.6
3,1259,59.0
4,1800,140.0
5,1325,80.1
6,1085,116.0
7,1110,45.0
8,1700,100.0
9,960,89.0


In [19]:
from sklearn.linear_model import LinearRegression 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler

In [32]:
df.columns

Index(['area_sqr_ft', 'price_lakhs'], dtype='object')

In [33]:
model = LinearRegression()
model.fit(df[['area_sqr_ft']],df['price_lakhs'])

In [37]:
model.intercept_,model.coef_

(np.float64(18.046501102723433), array([0.05168176]))