<a href="https://colab.research.google.com/github/Adeeba04/DHC-Tasks/blob/main/Housing_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

#Load Dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target, name="PRICE")
X_norm = (X - X.mean()) / X.std()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=42)

# --- Linear Regression ---
class LinearRegressionScratch:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters

    def fit(self, X, y):
        self.n_samples, self.n_features = X.shape
        self.weights = np.zeros(self.n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            dw = (1 / self.n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / self.n_samples) * np.sum(y_pred - y)
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

# --- Random Forest Regressor (simplified) ---
class SimpleDecisionTree:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.feature_idx = np.random.randint(0, X.shape[1])
        self.threshold = np.median(X[:, self.feature_idx])
        self.left_value = y[X[:, self.feature_idx] < self.threshold].mean()
        self.right_value = y[X[:, self.feature_idx] >= self.threshold].mean()

    def predict(self, X):
        return np.where(X[:, self.feature_idx] < self.threshold, self.left_value, self.right_value)

class RandomForestScratch:
    def __init__(self, n_trees=10, max_depth=3):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_trees):
            idxs = np.random.choice(len(X), len(X), replace=True)
            tree = SimpleDecisionTree(max_depth=self.max_depth)
            tree.fit(X[idxs], y[idxs])
            self.trees.append(tree)

    def predict(self, X):
        preds = np.array([tree.predict(X) for tree in self.trees])
        return np.mean(preds, axis=0)

# --- XGBoost-like Model (very simplified) ---
class SimpleXGBoost:
    def __init__(self, n_estimators=10, learning_rate=0.1):
        self.n_estimators = n_estimators
        self.lr = learning_rate
        self.trees = []

    def fit(self, X, y):
        self.pred = np.zeros(len(y))
        for _ in range(self.n_estimators):
            residual = y - self.pred
            tree = SimpleDecisionTree()
            tree.fit(X, residual)
            update = tree.predict(X)
            self.pred += self.lr * update
            self.trees.append(tree)

    def predict(self, X):
        pred = np.zeros(X.shape[0])
        for tree in self.trees:
            pred += self.lr * tree.predict(X)
        return pred

# --- Evaluation Function ---
def evaluate_model(model, X_train, X_test, y_train, y_test, name):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    print(f"{name} - RMSE: {rmse:.3f}, R2: {r2:.3f}")
    return rmse, r2

# --- Run Models ---
print("\nEvaluating Models:\n")

# Linear Regression
lr_model = LinearRegressionScratch(lr=0.01, n_iters=1000)
evaluate_model(lr_model, X_train.values, X_test.values, y_train.values, y_test.values, "Linear Regression")

# Random Forest
rf_model = RandomForestScratch(n_trees=20)
evaluate_model(rf_model, X_train.values, X_test.values, y_train.values, y_test.values, "Random Forest")

# XGBoost
xgb_model = SimpleXGBoost(n_estimators=30)
evaluate_model(xgb_model, X_train.values, X_test.values, y_train.values, y_test.values, "XGBoost")

# --- Feature Importance (only Random Forest simulated) ---
def plot_feature_importance_random_forest(model, feature_names):
    counts = np.zeros(len(feature_names))
    for tree in model.trees:
        counts[tree.feature_idx] += 1
    importance = counts / np.sum(counts)
    plt.figure(figsize=(10, 6))
    plt.bar(feature_names, importance)
    plt.title("Feature Importance (Random Forest)")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

plot_feature_importance_random_forest(rf_model, X.columns)
