In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('./archive/Housing.csv')

X = df[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']].values
y = df['price'].values.reshape(-1, 1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

def lasso_regression(X, y, alpha, num_iterations=1000, learning_rate=0.01):
    m, n = X.shape
    theta = np.zeros((n, 1))
    intercept = 0.0
    losses = []

    for _ in range(num_iterations):
        y_pred = X @ theta + intercept

        loss = (1 / (2 * m)) * np.sum((y_pred - y) ** 2) + alpha * np.sum(np.abs(theta))
        losses.append(loss)

        gradient = (1 / m) * (X.T @ (y_pred - y)) + alpha * np.sign(theta)
        intercept_grad = (1 / m) * np.sum(y_pred - y)

        theta -= learning_rate * gradient
        intercept -= learning_rate * intercept_grad

    return theta.flatten(), intercept, losses

results_lasso = []
alphas = [0.01, 0.1, 1.0, 10.0, 100.0, 100000.0, 1000000.0, 10000000000.0, 100000000000000000.0]

for a in alphas:
    coef, intercept, losses = lasso_regression(X, y, alpha=a, num_iterations=2000, learning_rate=0.001)

    y_pred = X @ coef.reshape(-1, 1) + intercept

    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    results_lasso.append({
        'alpha': a,
        'coef': coef,
        'intercept': intercept,
        'mse': mse,
        'r2': r2
    })

for res in results_lasso:
    print(f"Alpha: {res['alpha']}")
    print(f"Coefficients: {res['coef']}")
    print(f"Intercept: {res['intercept']}")
    print(f"MSE: {res['mse']:.2f}, R²: {res['r2']:.4f}")
    print("-" * 40)


Alpha: 0.01
Coefficients: [640520.14810613 209461.18295852 526898.1566149  425301.88102335
 348635.78694888]
Intercept: 4122267.809026439
MSE: 1958373092881.67, R²: 0.4392
----------------------------------------
Alpha: 0.1
Coefficients: [640520.0979538  209461.14298447 526898.11666342 425301.83114971
 348635.73390358]
Intercept: 4122267.8090264387
MSE: 1958373102951.32, R²: 0.4392
----------------------------------------
Alpha: 1.0
Coefficients: [640519.59643046 209460.74324399 526897.71714858 425301.3324133
 348635.20345066]
Intercept: 4122267.809026439
MSE: 1958373203650.10, R²: 0.4392
----------------------------------------
Alpha: 10.0
Coefficients: [640514.58119711 209456.74583919 526893.72200017 425296.34504929
 348629.89892137]
Intercept: 4122267.8090264387
MSE: 1958374210862.32, R²: 0.4392
----------------------------------------
Alpha: 100.0
Coefficients: [640464.42886363 209416.77179118 526853.7705161  425246.47140916
 348576.85362848]
Intercept: 4122267.809026439
MSE: 19583

In [29]:
# Q2. ElasticNet Regression
# a) Implement ElasticNet Regression using the following objective function:
 
# where:
# - α controls the strength of regularization,
# - λ₁ and λ₂ balance between LASSO and Ridge penalties.

def elastic_net_regression(X, y, alpha, l1_ratio, num_iterations=1000, learning_rate=0.01):
    m, n = X.shape
    theta = np.zeros((n, 1))
    intercept = 0.0
    losses = []

    for _ in range(num_iterations):
        y_pred = X @ theta + intercept

        loss = (1 / (2 * m)) * np.sum((y_pred - y) ** 2) + alpha * (l1_ratio * np.sum(np.abs(theta)) + (1 - l1_ratio) * np.sum(theta ** 2))
        losses.append(loss)

        gradient = (1 / m) * (X.T @ (y_pred - y)) + alpha * (l1_ratio * np.sign(theta) + 2 * (1 - l1_ratio) * theta)
        intercept_grad = (1 / m) * np.sum(y_pred - y)

        theta -= learning_rate * gradient
        intercept -= learning_rate * intercept_grad

    return theta.flatten(), intercept, losses
results_enet = []

# b) Experiment with different values of α and l1_ratio. Compare the model’s performance using MSE and R² score, and 
# report how the parameter choices affect the results compared to LASSO.

l1_ratios = [0.2, 0.5, 0.8]
alphas = [0.01, 0.1, 1.0, 10.0]
for l1 in l1_ratios:
    for a in alphas:
        coef, intercept, losses = elastic_net_regression(X, y, alpha=a, l1_ratio=l1, num_iterations=2000, learning_rate=0.001)

        y_pred = X @ coef.reshape(-1, 1) + intercept

        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)

        results_enet.append({
            'l1_ratio': l1,
            'alpha': a,
            'coef': coef,
            'intercept': intercept,
            'mse': mse,
            'r2': r2
        })
for res in results_enet:
    print(f"L1 Ratio: {res['l1_ratio']}, Alpha: {res['alpha']}")
    print(f"Coefficients: {res['coef']}")
    print(f"Intercept: {res['intercept']}")
    print(f"MSE: {res['mse']:.2f}, R²: {res['r2']:.4f}")
    print("-" * 40)






L1 Ratio: 0.2, Alpha: 0.01
Coefficients: [634561.33031723 209115.21216779 522569.75465431 421781.69532675
 346225.48085466]
Intercept: 4122267.809026439
MSE: 1959840368150.40, R²: 0.4388
----------------------------------------
L1 Ratio: 0.2, Alpha: 0.1
Coefficients: [584964.46426322 205553.71641924 486319.15475909 392327.25249097
 325793.93584783]
Intercept: 4122267.809026439
MSE: 1980985850377.15, R²: 0.4327
----------------------------------------
L1 Ratio: 0.2, Alpha: 1.0
Coefficients: [319205.04222999 158219.29290986 282551.51030691 227711.60250306
 201167.17604767]
Intercept: 4122267.809026439
MSE: 2402671998160.53, R²: 0.3120
----------------------------------------
L1 Ratio: 0.2, Alpha: 10.0
Coefficients: [56920.15454644 37202.84298643 54162.45665125 43924.72768173
 40084.4701897 ]
Intercept: 4122267.8090264387
MSE: 3531753792681.46, R²: -0.0113
----------------------------------------
L1 Ratio: 0.5, Alpha: 0.01
Coefficients: [636784.71543482 209246.2691273  524185.43517002 423

In [34]:
# Q3. Polynomial Regression
# a) Load the dataset and select “area” as the input feature and “price” as the target.
# b) Transform the input feature using polynomial terms as:
# [X, X², X³, ..., X^d]

X = df[['area']].values
y = df['price'].values.reshape(-1, 1)
scaler = StandardScaler()
X = scaler.fit_transform(X)


#  for polynomial degrees d = 2, 3, 4.
# c) Implement Polynomial Regression using PolynomialFeatures and LinearRegression. Evaluate the models using MSE and R² score.

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
results_poly = []
degrees = [2, 3, 4, 5, 6]
for d in degrees:
    poly = PolynomialFeatures(degree=d)
    X_poly = poly.fit_transform(X)

    model = LinearRegression()
    model.fit(X_poly, y)

    y_pred = model.predict(X_poly)

    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    results_poly.append({
        'degree': d,
        'coef': model.coef_,
        'intercept': model.intercept_,
        'mse': mse,
        'r2': r2
    })
for res in results_poly:
    print(f"Degree: {res['degree']}")
    print(f"Coefficients: {res['coef']}")
    print(f"Intercept: {res['intercept']}")
    print(f"MSE: {res['mse']:.2f}, R²: {res['r2']:.4f}")
    print("-" * 40)



Degree: 2
Coefficients: [[      0.         1271452.93175927 -204791.17085265]]
Intercept: [4971520.41855907]
MSE: 2363908046361.73, R²: 0.3231
----------------------------------------
Degree: 3
Coefficients: [[      0.         1247367.60109755 -300254.71949851   26221.35633008]]
Intercept: [5032436.03854087]
MSE: 2357026034385.69, R²: 0.3250
----------------------------------------
Degree: 4
Coefficients: [[      0.         1648348.74405577 -169986.17319719 -235998.37326444
    47020.3107784 ]]
Intercept: [4978919.10990229]
MSE: 2285213982030.96, R²: 0.3456
----------------------------------------
Degree: 5
Coefficients: [[      0.         1739922.3478073  -296700.54393397 -301670.38688111
   103665.74981499   -7924.88444782]]
Intercept: [5024491.493133]
MSE: 2280059576256.73, R²: 0.3471
----------------------------------------
Degree: 6
Coefficients: [[ 0.00000000e+00  1.75155578e+06 -2.44098728e+05 -3.22823352e+05
   8.32425917e+04  2.66701165e+03 -1.21959872e+03]]
Intercept: [501020

In [33]:
# Q4. Hyperparameter Tuning using GridSearch and Randomized Search
# a) Define a parameter grid to search for the optimal polynomial degree K.
# b) Use GridSearchCV to find the best value of K by performing cross-validation. Report the best polynomial degree and the corresponding MSE and R² score.
# c) Similarly, use RandomizedSearchCV to search for the optimal polynomial degree K. 

# Report the best polynomial degree and the corresponding MSE and R² score.
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),
    ('linear', LinearRegression())
])
param_grid = {
    'poly__degree': [2, 3, 4, 5, 6, 7, 8, 9, 10]
}
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_degree_grid = grid_search.best_params_['poly__degree']
y_pred_grid = grid_search.predict(X_test)
mse_grid = mean_squared_error(y_test, y_pred_grid)
r2_grid = r2_score(y_test, y_pred_grid)
print(f"GridSearchCV - Best Degree: {best_degree_grid}, MSE: {mse_grid:.2f}, R²: {r2_grid:.4f}")
param_dist = {
    'poly__degree': [2, 3, 4, 5, 6, 7, 8, 9, 10]
}
random_search = RandomizedSearchCV(pipeline, param_dist, n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train)
best_degree_random = random_search.best_params_['poly__degree']
y_pred_random = random_search.predict(X_test)
mse_random = mean_squared_error(y_test, y_pred_random)
r2_random = r2_score(y_test, y_pred_random)
print(f"RandomizedSearchCV - Best Degree: {best_degree_random}, MSE: {mse_random:.2f}, R²: {r2_random:.4f}")


GridSearchCV - Best Degree: 4, MSE: 3730760199280.62, R²: 0.2619
RandomizedSearchCV - Best Degree: 4, MSE: 3730760199280.62, R²: 0.2619


