# Decision Tree Regressor

Purpose: load processed data, fit DecisionTree, evaluate with CV, tune max_depth/min_samples, and save model.


# Data Loading

In [1]:
from pathlib import Path
import pandas as pd

ROOT = Path("..")
PROC = ROOT / "data" / "train" / "housing_train_processed.csv"
df = pd.read_csv(PROC)
y = df["median_house_value"].values
X = df.drop(columns=["median_house_value"]).values
print(X.shape)


(16512, 24)


# Model Fitting

In [2]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(random_state=42)
model.fit(X, y)
print("Train R^2:", model.score(X, y))


Train R^2: 1.0


# Cross-Validation

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np
scores = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5, n_jobs=-1)
rmse = np.sqrt(-scores)
print("CV RMSE:", rmse)
print("Mean RMSE:", rmse.mean(), "±", rmse.std())


CV RMSE: [67310.41355391 72700.01257318 71254.93828433 70287.70862284
 70366.31853802]
Mean RMSE: 70383.87831445671 ± 1765.1609538624925


# Hyperparameter Tuning

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
import numpy as np

param_grid = {
    "max_depth": [None, 10, 20],     # trimmed
    "min_samples_split": [2, 10],    # trimmed
    "min_samples_leaf": [1, 2],      # trimmed
}

gs = GridSearchCV(
    DecisionTreeRegressor(random_state=42),
    param_grid=param_grid,
    scoring="neg_mean_squared_error",
    cv=3,            # fewer folds
    n_jobs=1         # set to 1 on limited CPUs
)
gs.fit(X, y)

print("Best params:", gs.best_params_)
print("Best RMSE:", np.sqrt(-gs.best_score_))
best_model = gs.best_estimator_


Best params: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10}
Best RMSE: 61071.34635840309


# Model Saving

In [5]:
import joblib, pathlib
final_model = best_model if "best_model" in locals() else model
out = pathlib.Path(".") / "decision_tree_model.pkl"
joblib.dump(final_model, out)
out

PosixPath('decision_tree_model.pkl')