# Comparison of OLS vs Scikit-learn

Veri: California Housing (Rooms vs Price)

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

df = pd.read_csv("california_rooms_price.csv")
X = df[['Rooms']].values
y = df['Price'].values

# OLS
X_b = np.c_[np.ones((X.shape[0], 1)), X]
theta = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y
y_pred_ols = X_b @ theta
mse_ols = np.mean((y - y_pred_ols) ** 2)

# Sklearn
model = LinearRegression().fit(X, y)
y_pred_skl = model.predict(X)
mse_skl = mean_squared_error(y, y_pred_skl)

print("OLS MSE:", mse_ols)
print("Sklearn MSE:", mse_skl)

# Plot
plt.scatter(X, y, alpha=0.3, label='Data')
plt.plot(X, y_pred_ols, label='OLS Fit', color='red')
plt.plot(X, y_pred_skl, label='Sklearn Fit', color='green', linestyle='--')
plt.xlabel('Average Rooms')
plt.ylabel('Median House Price')
plt.legend()
plt.grid(True)
plt.show()
