In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [4]:
data = pd.read_csv('housing.csv')

In [5]:
data = data.dropna()

In [6]:
data = pd.get_dummies(data, columns=['ocean_proximity'])

In [7]:
target = 'median_house_value'

In [8]:
X = data.drop(target, axis=1)
y = data[target]

In [9]:
n = len(data)
k = int(np.ceil(1 + 3.322 * np.log10(n)))

In [10]:
y_binned = pd.cut(y, bins=k, labels=False)

In [11]:
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(X, y_binned):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_predictions = ridge.predict(X_test_scaled)

In [14]:
ridge_mae = mean_absolute_error(y_test, ridge_predictions)
ridge_mse = mean_squared_error(y_test, ridge_predictions)
ridge_rmse = np.sqrt(ridge_mse)

In [15]:
lasso = Lasso(alpha=1.0)
lasso.fit(X_train_scaled, y_train)
lasso_predictions = lasso.predict(X_test_scaled)

  model = cd_fast.enet_coordinate_descent(


In [16]:
lasso_mae = mean_absolute_error(y_test, lasso_predictions)
lasso_mse = mean_squared_error(y_test, lasso_predictions)
lasso_rmse = np.sqrt(lasso_mse)

In [17]:
print('Ridge Regression:')
print(f'  MAE: {ridge_mae:.2f}')
print(f'  MSE: {ridge_mse:.2f}')
print(f'  RMSE: {ridge_rmse:.2f}')

print('\nLasso Regression:')
print(f'  MAE: {lasso_mae:.2f}')
print(f'  MSE: {lasso_mse:.2f}')
print(f'  RMSE: {lasso_rmse:.2f}')


Ridge Regression:
  MAE: 50398.54
  MSE: 5000624440.74
  RMSE: 70715.09

Lasso Regression:
  MAE: 50399.27
  MSE: 5000756302.84
  RMSE: 70716.03


# Ridge vs. Lasso Regression Evaluation

## Ridge Regression (Lower Errors)
- **MAE:** 50,398.54  
- **MSE:** 5,000,624,440.74  
- **RMSE:** 70,715.09  

## Lasso Regression (Higher Errors)
- **MAE:** 50,399.27  
- **MSE:** 5,000,756,302.84  
- **RMSE:** 70,716.03  

## Key Observations
1. **Ridge performed slightly better** due to minimal differences in errors.
2. **Lasso had marginally higher errors**, likely due to feature elimination.
3. **Use Ridge for better accuracy**; use Lasso for feature selection.

### Conclusion
Ridge is slightly preferred due to better performance, but both models are effective with minimal differences.