# Linear, Lasso and Ridge Regression For Median House Value Prediction

## Install & Import Needed Libraries

In [326]:
pip install numpy pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [327]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

## Splitting the Dataset

In [328]:
data = pd.read_csv("California_Houses.csv")

data = data.sample(frac=1, random_state=42).reset_index(drop=True)

total_samples = len(data)
train_end = int(0.7 * total_samples)
val_end = int(0.85 * total_samples)

train_data = data[:train_end]
val_data = data[train_end:val_end]
test_data = data[val_end:]

X_train = train_data.drop(columns=["Median_House_Value"])
y_train = train_data["Median_House_Value"]

X_val = val_data.drop(columns=["Median_House_Value"])
y_val = val_data["Median_House_Value"]

X_test = test_data.drop(columns=["Median_House_Value"])
y_test = test_data["Median_House_Value"]

## 1. Linear Regression

### Initialise and Train Model

In [329]:
model = LinearRegression()
model.fit(X_train, y_train)

### Predicting on Validation Set

In [330]:
y_val_pred = model.predict(X_val)

mse_val = mean_squared_error(y_val, y_val_pred)
mae_val = mean_absolute_error(y_val, y_val_pred)

print("Validation Set:")
print("Mean Squared Error:", mse_val)
print("Mean Absolute Error:", mae_val)

Validation Set:
Mean Squared Error: 4573978531.209601
Mean Absolute Error: 49745.574858650725


### Predicting on Test Set

In [331]:
y_test_pred = model.predict(X_test)

mse_test = mean_squared_error(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

print("Test Set:")
print("Mean Squared Error:", mse_test)
print("Mean Absolute Error:", mae_test)

Test Set:
Mean Squared Error: 5119928659.042544
Mean Absolute Error: 50983.63801997039


## 2. Lasso Regression

### Feature scaling

In [332]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

### Initialize and Train Model

In [333]:
lasso_model = Lasso(alpha=0.1, max_iter=5000)
lasso_model.fit(X_train_scaled, y_train)

### Predicting on Validation Set

In [334]:
y_val_pred_lasso = lasso_model.predict(X_val_scaled)

mse_val_lasso = mean_squared_error(y_val, y_val_pred_lasso)
mae_val_lasso = mean_absolute_error(y_val, y_val_pred_lasso)

print("Validation Set:")
print("Mean Squared Error:", mse_val_lasso)
print("Mean Absolute Error:", mae_val_lasso)

Validation Set:
Mean Squared Error: 4573987731.342622
Mean Absolute Error: 49745.70818668422


### Predicting on Test Set

In [335]:
y_test_pred_lasso = lasso_model.predict(X_test_scaled)

mse_test_lasso = mean_squared_error(y_test, y_test_pred_lasso)
mae_test_lasso = mean_absolute_error(y_test, y_test_pred_lasso)

print("Test Set:")
print("Mean Squared Error:", mse_test_lasso)
print("Mean Absolute Error:", mae_test_lasso)

Test Set:
Mean Squared Error: 5119916235.292772
Mean Absolute Error: 50983.579233982404


## 3. Ridge Regression

### Initialize and Train Model

In [336]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train_scaled, y_train)

### Predicting on Validation Set

In [337]:
y_val_pred_ridge = ridge_model.predict(X_val_scaled)

mse_val_ridge = mean_squared_error(y_val, y_val_pred_ridge)
mae_val_ridge = mean_absolute_error(y_val, y_val_pred_ridge)

print("Validation Set:")
print("Mean Squared Error:", mse_val_ridge)
print("Mean Absolute Error:", mae_val_ridge)

Validation Set:
Mean Squared Error: 4574539776.915881
Mean Absolute Error: 49753.65723024196


### Predicting on Test Set

In [338]:
y_test_pred_ridge = ridge_model.predict(X_test_scaled)

mse_test_ridge = mean_squared_error(y_test, y_test_pred_ridge)
mae_test_ridge = mean_absolute_error(y_test, y_test_pred_ridge)

print("Test Set:")
print("Mean Squared Error:", mse_test_ridge)
print("Mean Absolute Error:", mae_test_ridge)

Test Set:
Mean Squared Error: 5119117838.892064
Mean Absolute Error: 50980.25474831543
