In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split


# Load the California Housing dataset
housing_data = fetch_california_housing()
features = housing_data.data
target_prices = housing_data.target




In [7]:
# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(features, target_prices, test_size=0.2, random_state=42)


In [4]:
# Full-Batch Gradient Descent (Using LinearRegression from sklearn)
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, Y_train)
Y_pred_full_batch = linear_regressor.predict(X_test)

# Stochastic Gradient Descent (Using SGDRegressor from sklearn)
sgd_regressor = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
sgd_regressor.fit(X_train, Y_train)
Y_pred_sgd = sgd_regressor.predict(X_test)

# Calculate Mean Squared Error (MSE) and R² score for both methods
mse_full_batch = mean_squared_error(Y_test, Y_pred_full_batch)
mse_sgd = mean_squared_error(Y_test, Y_pred_sgd)
r2_full_batch = r2_score(Y_test, Y_pred_full_batch)
r2_sgd = r2_score(Y_test, Y_pred_sgd)

# Print the results for comparison
print("Full-Batch Gradient Descent Mean Squared Error:", mse_full_batch)
print("Stochastic Gradient Descent Mean Squared Error:", mse_sgd)
print("Full-Batch Gradient Descent R² Score:", r2_full_batch)
print("Stochastic Gradient Descent R² Score:", r2_sgd)

# Check model coefficients (weights)
print("\nLinear Regression Coefficients (Full-Batch GD):", linear_regressor.coef_)
print("SGD Regression Coefficients (Stochastic GD):", sgd_regressor.coef_)

# Optionally, print intercepts
print("\nLinear Regression Intercept (Full-Batch GD):", linear_regressor.intercept_)
print("SGD Regression Intercept (Stochastic GD):", sgd_regressor.intercept_)

Full-Batch Gradient Descent Mean Squared Error: 0.5558915986952422
Stochastic Gradient Descent Mean Squared Error: 3.125598638710681e+28
Full-Batch Gradient Descent R² Score: 0.5757877060324524
Stochastic Gradient Descent R² Score: -2.385208504070616e+28

Linear Regression Coefficients (Full-Batch GD): [ 4.48674910e-01  9.72425752e-03 -1.23323343e-01  7.83144907e-01
 -2.02962058e-06 -3.52631849e-03 -4.19792487e-01 -4.33708065e-01]
SGD Regression Coefficients (Stochastic GD): [ 3.70725899e+11 -1.39996413e+11 -8.06617673e+10 -1.82802082e+10
  5.53523872e+10 -3.54217051e+10  2.47104127e+11 -6.79713288e+11]

Linear Regression Intercept (Full-Batch GD): -37.02327770606391
SGD Regression Intercept (Stochastic GD): [2.74752632e+09]
