In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.impute import SimpleImputer

# Load the dataset

data = pd.read_csv("housing.csv")

# Checking the dataset
print(data.head())

# Define features and target
X = data.drop(columns=["median_house_value"])

# One-hot encode the 'ocean_proximity' column
X = pd.get_dummies(X, columns=["ocean_proximity"])

y = data["median_house_value"]

# Determine number of bins using Sturge’s Rule
num_bins = int(np.ceil(1 + np.log2(len(y))))

# Create stratified bins for y
data["price_category"] = pd.cut(y, bins=num_bins, labels=False)

# Stratified splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=data["price_category"], random_state=42)

del data["price_category"]  # Remove bin column after splitting
# Handling missing values
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Standardizing features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_test = scaler.transform(X_test)

# Train Ridge and Lasso models
ridge = Ridge(alpha=1.0)
lasso = Lasso(alpha=0.1)
ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)

# Predictions
y_pred_ridge = ridge.predict(X_test)
y_pred_lasso = lasso.predict(X_test)

# Evaluation Metrics
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"{model_name} - MAE: {mae:.2f}, MSE: {mse:.2f}, RMSE: {rmse:.2f}")
    return mae, mse, rmse

evaluate(y_test, y_pred_ridge, "Ridge Regression")
evaluate(y_test, y_pred_lasso, "Lasso Regression")


   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value ocean_proximity  
0       322.0       126.0         8.3252            452600.0        NEAR BAY  
1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  
2       496.0       177.0         7.2574            352100.0        NEAR BAY  
3       558.0       219.0         5.6431            341300.0        NEAR BAY  
4       565.0       259.0         3.8462            342200.0        NEAR BAY  
Ridge Regression - MAE: 2448633.61, MSE: 6006069476576.81, RMSE: 2450728.36

  model = cd_fast.enet_coordinate_descent(


(2456202.1271552, 6043527347499.965, 2458358.6694174563)

In [None]:
print("Housing Regression Findings:")
print("1. Ridge Regression performed slightly better than Lasso Regression in terms of MAE, MSE, and RMSE.")
print("2. Ridge Regression MAE: 2,448,633.61, MSE: 6,006,069,476,576.81, RMSE: 2,450,728.36.")
print("3. Lasso Regression MAE: 2,456,202.13, MSE: 6,043,527,347,499.96, RMSE: 2,458,358.67.")
print("4. The differences in performance are minor, but Ridge Regression's lower error metrics suggest it might generalize better.")



Housing Regression Findings:
1. Ridge Regression performed slightly better than Lasso Regression in terms of MAE, MSE, and RMSE.
2. Ridge Regression MAE: 2,448,633.61, MSE: 6,006,069,476,576.81, RMSE: 2,450,728.36.
3. Lasso Regression MAE: 2,456,202.13, MSE: 6,043,527,347,499.96, RMSE: 2,458,358.67.
4. The differences in performance are minor, but Ridge Regression's lower error metrics suggest it might generalize better.
