In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
import math

# -----------------------------------------------------
# 1. Load the 1000-row housing dataset
# -----------------------------------------------------
df = pd.read_csv("housing_dataset_1000.csv")  # <-- your dataset

print("Dataset Preview:")
print(df.head())

# -----------------------------------------------------
# 2. Select Features and Target
# -----------------------------------------------------
X = df[["RM", "LSTAT", "PTRATIO", "TAX", "AGE", "CRIM"]]
y = df["PRICE"]

# -----------------------------------------------------
# 3. Train-Test Split
# -----------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------------------------------
# 4. Train Ridge Regression
# -----------------------------------------------------
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
ridge_pred = ridge.predict(X_test)

ridge_rmse = math.sqrt(mean_squared_error(y_test, ridge_pred))
ridge_r2 = r2_score(y_test, ridge_pred)

# -----------------------------------------------------
# 5. Train Lasso Regression
# -----------------------------------------------------
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
lasso_pred = lasso.predict(X_test)

lasso_rmse = math.sqrt(mean_squared_error(y_test, lasso_pred))
lasso_r2 = r2_score(y_test, lasso_pred)

# -----------------------------------------------------
# 6. Results
# -----------------------------------------------------
print("\n===== MODEL PERFORMANCE COMPARISON =====")
print("Ridge RMSE:", ridge_rmse)
print("Ridge R²:", ridge_r2)

print("\nLasso RMSE:", lasso_rmse)
print("Lasso R²:", lasso_r2)

# -----------------------------------------------------
# 7. Coefficients Comparison
# -----------------------------------------------------
print("\n===== COEFFICIENTS =====")
print("\nRidge Coefficients:")
for feature, coef in zip(X.columns, ridge.coef_):
    print(f"{feature}: {coef}")

print("\nLasso Coefficients:")
for feature, coef in zip(X.columns, lasso.coef_):
    print(f"{feature}: {coef}")


Dataset Preview:
        CRIM        RM      LSTAT    PTRATIO         TAX        AGE      PRICE
0   7.490802  6.142161  29.174448  15.878261  635.956531  98.534042  17.028372
1  19.014286  4.931725  10.609063  20.035377  488.490806  14.515782  18.621205
2  14.639879  6.304158  14.979191  21.017740  372.740427  18.702510  23.277837
3  11.973170  6.488469   6.686834  14.034727  601.505941  74.841107  26.684773
4   3.120373  6.447832  18.712622  12.669736  470.139447  35.364250  21.931362

===== MODEL PERFORMANCE COMPARISON =====
Ridge RMSE: 1.9730089895309049
Ridge R²: 0.8442495976798091

Lasso RMSE: 1.968567522843838
Lasso R²: 0.8449500320029825

===== COEFFICIENTS =====

Ridge Coefficients:
RM: 4.893796602613758
LSTAT: -0.31246620263469665
PTRATIO: 0.019888194974537598
TAX: -0.01007480902423035
AGE: 0.01843252890411953
CRIM: -0.03349060382138675

Lasso Coefficients:
RM: 4.745675458461395
LSTAT: -0.3106811010678717
PTRATIO: 0.005709964066020177
TAX: -0.010096031848008388
AGE: 0.01832671