In [None]:
# Random Forest for Regression
import numpy as np

"""
Feature 0 → Size
Feature 1 → Rooms
Feature 2 → Age
"""
X_class = np.array(
    [
        [50, 1, 30],
        [70, 2, 20], 
        [100, 3, 10], 
        [120, 3, 5], 
        [140, 4, 2], 
        [160, 5, 1]
    ]
)


Y_class = np.array([150, 200, 300, 360, 400, 450])  # Target → Price


def calc_threshold(x):
    uni = np.unique(x)
    return (uni[1:] + uni[:-1]) / 2


def random_feature_selection(X):
    n = X.shape[1]
    indices = np.random.choice(n, int(np.sqrt(n)), replace=False)
    return X[:, indices]


def weighted_variance(left_side, right_side):
    num_of_samples = len(left_side) + len(right_side)
    left_variance = (len(left_side) / num_of_samples) * np.var(left_side)
    right_variance = (len(right_side) / num_of_samples) * np.var(right_side)
    return left_variance + right_variance


def regression_best_split(X, Y):
    best_threshold = None
    best_feature = None
    best_variance = float("inf")  # Start with infinity
    _, num_of_features = X.shape
    random_feature_selection = np.random.choice(
        num_of_features, int(np.sqrt(num_of_features)), replace=False
    )

    for feature_idx in random_feature_selection:
        feature = X[:, feature_idx]
        thresholds = calc_threshold(feature)
        for threshold in thresholds:
            left_side = Y[feature < threshold]
            right_side = Y[feature >= threshold]

            if len(left_side) == 0 or len(right_side) == 0:
                continue

            var_split = weighted_variance(left_side, right_side)

            if var_split < best_variance:
                best_feature = feature_idx
                best_variance = var_split
                best_threshold = threshold

    return best_feature, best_threshold


def make_regression_leaf(Y):
    return {"type": "leaf", "prediction": np.mean(Y)}


def regression_build_tree(X, Y, depth=0, max_depth=3, min_samples=2):
    if len(np.unique(Y)) == 1 or depth >= max_depth or len(Y) < min_samples:
        return make_regression_leaf(Y)

    feature, threshold = regression_best_split(X, Y)

    if feature is None:
        return make_regression_leaf(Y)

    left_mask = X[:, feature] < threshold
    right_mask = ~left_mask

    left_child = regression_build_tree(X[left_mask], Y[left_mask], depth=depth + 1)
    right_child = regression_build_tree(X[right_mask], Y[right_mask], depth=depth + 1)

    return {
        "type": "node",
        "feature": feature,
        "threshold": threshold,
        "left": left_child,
        "right": right_child,
    }


def bootstrap_sample(X, Y):
    n = len(X)
    indices = np.random.choice(n, n, replace=True)
    return X[indices], Y[indices]


def build_forest(X, Y, n_trees=3):
    forest = []
    for _ in range(n_trees):
        X_b, Y_b = bootstrap_sample(X, Y)
        tree = regression_build_tree(X_b, Y_b)
        forest.append(tree)
    return forest


def reg_predict_one(x, tree):
    if tree["type"] == "leaf":
        return tree["prediction"]

    feature = tree["feature"]
    threshold = tree["threshold"]

    if x[feature] < threshold:
        return reg_predict_one(x, tree["left"])
    else:
        return reg_predict_one(x, tree["right"])


def reg_predict(x, tree):
    return [reg_predict_one(i, tree) for i in x]


def forest_predict(X, forest):
    predictions = np.array([reg_predict(X, tree) for tree in forest])
    return np.mean(predictions, axis=0)


forest = build_forest(X_class, Y_class, n_trees=5)
predictions = forest_predict(X_class, forest)
print(predictions)


[160. 220. 336. 360. 410. 430.]
