1- Importing Libraries.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as mis
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

2- Load and Preprocess Dataset.

In [2]:
dataset = pd.read_csv("Housing.csv")

features = dataset.drop(columns=['price'])
target = dataset['price']

features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)


3- Implement the Square Trick.

In [3]:
class CustomLinearRegression:
    def __init__(self, learning_rate=0.01, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.coefficients = None
        self.intercept = None

    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.coefficients = np.zeros(num_features)
        self.intercept = 0

        for _ in range(self.iterations):
            predictions = np.dot(X, self.coefficients) + self.intercept

            gradient_coefficients = (1 / num_samples) * np.dot(X.T, (predictions - y))
            gradient_intercept = (1 / num_samples) * np.sum(predictions - y)

            self.coefficients -= self.learning_rate * gradient_coefficients
            self.intercept -= self.learning_rate * gradient_intercept

    def fit_closed_form(self, X, y):
        X_with_bias = np.c_[np.ones(X.shape[0]), X]  # Add bias (intercept) term
        weights = np.linalg.inv(X_with_bias.T @ X_with_bias) @ X_with_bias.T @ y
        self.intercept = weights[0]
        self.coefficients = weights[1:]

    def predict(self, X):
        return np.dot(X, self.coefficients) + self.intercept

4- Train and Evaluate Custom Linear Regression with Square Trick.

In [None]:
# Filter out non-numeric columns from the training and testing datasets
numeric_features_train = features_train.select_dtypes(include=[np.number])
numeric_features_test = features_test.select_dtypes(include=[np.number])

# Normalize the numeric features
features_train_normalized = (numeric_features_train - numeric_features_train.mean()) / numeric_features_train.std()
features_test_normalized = (numeric_features_test - numeric_features_train.mean()) / numeric_features_train.std()

# Create and train the custom linear regression model with closed-form solution
custom_model_cf = CustomLinearRegression()
custom_model_cf.fit_closed_form(features_train_normalized.values, target_train.values)

# Make predictions with the custom model
target_predictions_cf = custom_model_cf.predict(features_test_normalized.values)

# Evaluate the model
rmse_cf = np.sqrt(mean_squared_error(target_test, target_predictions_cf))
r2_cf = r2_score(target_test, target_predictions_cf)

# Print the evaluation metrics
print("Custom Linear Regression (Square Trick) RMSE:", rmse_cf)
print("Custom Linear Regression (Square Trick) R^2:", r2_cf)


5- Experiment with Polynomial Features.

In [None]:
if r2_cf < 0.9:
    polynomial_transformer = PolynomialFeatures(degree=2)
    features_poly_train = polynomial_transformer.fit_transform(features_train_normalized)
    features_poly_test = polynomial_transformer.transform(features_test_normalized)

    custom_model_poly = CustomLinearRegression(learning_rate=0.01, iterations=1000)
    custom_model_poly.fit(features_poly_train, target_train.values)

    target_predictions_poly = custom_model_poly.predict(features_poly_test)

    rmse_poly = np.sqrt(mean_squared_error(target_test, target_predictions_poly))
    r2_poly = r2_score(target_test, target_predictions_poly)

    print("Polynomial Regression (Custom) RMSE:", rmse_poly)
    print("Polynomial Regression (Custom) R^2:", r2_poly)

    plt.scatter(target_test, target_predictions_poly, color='blue')
    plt.xlabel("Actual Prices")
    plt.ylabel("Predicted Prices")
    plt.title("Polynomial Regression (Custom): Actual vs Predicted Prices")
    plt.show()



In [None]:
# Scatter plot: Actual vs Predicted with Ideal Line
plt.figure(figsize=(8, 6))
plt.scatter(target_test, target_predictions_poly, color='blue', label='Predicted', alpha=0.7)
plt.plot([min(target_test), max(target_test)], [min(target_test), max(target_test)], color='red', label='Actual')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title(f"Actual vs Predicted Prices")
plt.legend()
plt.grid()
plt.show()
