In [1]:
#Importing librarires
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd


In [2]:
#Dataset 
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Load dataset
data = fetch_california_housing()
X = data.data[:, 0]  # Use 'MedInc' (Median Income) feature
y = data.target      # House value

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Linear Regression form Sctrach 

In [3]:
class LinearRegressionScratch:
    def __init__(self):
        self.slope = None
        self.intercept = None

    def fit(self, X, y):
        n = len(X)
        x_mean = np.mean(X)
        y_mean = np.mean(y)
        
        numerator = sum((X - x_mean) * (y - y_mean))
        denominator = sum((X - x_mean)**2)
        
        self.slope = numerator / denominator
        self.intercept = y_mean - self.slope * x_mean

    def predict(self, X):
        return self.slope * X + self.intercept


In [None]:
#Custom one 
model_scratch = LinearRegressionScratch()
model_scratch.fit(X_train, y_train)

y_pred_scratch = model_scratch.predict(X_test)

print(f"Slope (scratch): {model_scratch.slope}")
print(f"Intercept (scratch): {model_scratch.intercept}")


Slope (scratch): 0.41933849393812556
Intercept (scratch): 0.44459729169079387


In [4]:
# Sckit-Learn 

X_train_reshaped = X_train.reshape(-1, 1)
X_test_reshaped = X_test.reshape(-1, 1)

model_sklearn = LinearRegression()
model_sklearn.fit(X_train_reshaped, y_train)

y_pred_sklearn = model_sklearn.predict(X_test_reshaped)

print(f"Slope (sklearn): {model_sklearn.coef_[0]}")
print(f"Intercept (sklearn): {model_sklearn.intercept_}")


Slope (sklearn): 0.4193384939381271
Intercept (sklearn): 0.4445972916907879


### Evaluate both models 

In [7]:
from sklearn.metrics import mean_squared_error, r2_score

# Evaluate Scratch Model
mse_scratch = mean_squared_error(y_test, y_pred_scratch)
r2_scratch = r2_score(y_test, y_pred_scratch)

# Evaluate Scikit-learn Model
mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
r2_sklearn = r2_score(y_test, y_pred_sklearn)

# Print results
print("Custom Scratch Model:")
print(f"  MSE: {mse_scratch:.4f}")
print(f"  R2 Score: {r2_scratch:.4f}")

print("\nScikit-learn Model:")
print(f"  MSE: {mse_sklearn:.4f}")
print(f"  R2 Score: {r2_sklearn:.4f}")


Custom Scratch Model:
  MSE: 0.7091
  R2 Score: 0.4589

Scikit-learn Model:
  MSE: 0.7091
  R2 Score: 0.4589


In [None]:
plt.figure(figsize=(10, 6))

# Scatter plot of actual test data
plt.scatter(X_test, y_test, color='blue', alpha=0.5, label='Actual Test Data')

# Plot predictions from scratch
plt.plot(X_test, y_pred_scratch, color='green', linestyle='--', label='Scratch Prediction')

# Plot predictions from sklearn
plt.plot(X_test, y_pred_sklearn, color='red', linestyle='-.', label='Sklearn Prediction')

plt.legend()
plt.title('Linear Regression: Scratch vs Sklearn (California Housing)')
plt.xlabel('Median Income (X_test)')
plt.ylabel('Median House Value (y)')
plt.grid(True)
plt.show()
