In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler

# 1. Load the dataset
df = pd.read_csv("house_price.csv")

X = df[["size", "bedroom"]]
y = df["price"]

# 2. Linear Regression
lr_model = LinearRegression()
lr_model.fit(X, y)
y_pred_lr = lr_model.predict(X)

# Coefficients
print("LinearRegression Coefficients:", lr_model.coef_)
print("LinearRegression Intercept:", lr_model.intercept_)

# Metrics
mae_lr = mean_absolute_error(y, y_pred_lr)
mse_lr = mean_squared_error(y, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
mape_lr = mean_absolute_percentage_error(y, y_pred_lr)

print(f"LinearRegression MAE: {mae_lr:.2f}")
print(f"LinearRegression MSE: {mse_lr:.2f}")
print(f"LinearRegression RMSE: {rmse_lr:.2f}")
print(f"LinearRegression MAPE: {mape_lr:.4f}")

# 3. SGD Regressor (with standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, eta0=0.01, learning_rate='invscaling')
sgd_model.fit(X_scaled, y)
y_pred_sgd = sgd_model.predict(X_scaled)

print("\nSGDRegressor Coefficients:", sgd_model.coef_)
print("SGDRegressor Intercept:", sgd_model.intercept_)

mae_sgd = mean_absolute_error(y, y_pred_sgd)
mse_sgd = mean_squared_error(y, y_pred_sgd)
rmse_sgd = np.sqrt(mse_sgd)
mape_sgd = mean_absolute_percentage_error(y, y_pred_sgd)

print(f"SGDRegressor MAE: {mae_sgd:.2f}")
print(f"SGDRegressor MSE: {mse_sgd:.2f}")
print(f"SGDRegressor RMSE: {rmse_sgd:.2f}")
print(f"SGDRegressor MAPE: {mape_sgd:.4f}")

# MAE - Average of absolute diff between predicted and actual values
# pros: not heavily affected by outliers, cons: doesnt emphasis large errors
# useful when outliers are not critical

# MSE - Average of squared diff between predicted and actual values
# pros: penalizes large outliers more than small, cons: hard to interpret as it is squared
# useful when outliers are critical 

# RMSE - Square root of MSE, bring units back to original scale 
# pros: same interpretation as MAE but sensitive to large errors like MSE, cons: can be affected by outliers 
# similar to MSE, but in the original scale

# MAPE - Average percent diff between predicted and actual values
# pros: gives percentage based error
# useful for relative accuracy 

LinearRegression Coefficients: [  139.21067402 -8738.01911233]
LinearRegression Intercept: 89597.90954279751
LinearRegression MAE: 51502.77
LinearRegression MSE: 4086560101.21
LinearRegression RMSE: 63926.21
LinearRegression MAPE: 0.1561

SGDRegressor Coefficients: [108927.29770082  -6024.0215484 ]
SGDRegressor Intercept: [340466.81930741]
SGDRegressor MAE: 51567.29
SGDRegressor MSE: 4086818104.41
SGDRegressor RMSE: 63928.23
SGDRegressor MAPE: 0.1563
