In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load the Dataset
print("Loading the California Housing dataset...")
data = fetch_california_housing()
X = data.data
y = data.target
print(f"Dataset loaded. Number of samples: {X.shape[0]}, Number of features: {X.shape[1]}")
print("Target variable: Median House Value (in hundreds of thousands of dollars)")


In [None]:
# Split the Data into Training and Testing Sets
print("\nSplitting data into training and testing sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")

In [4]:
# Train the Decision Tree Regressor Model
print("\nTraining the Decision Tree Regressor model...")
dt_model = DecisionTreeRegressor(random_state=42) # You can add parameters like max_depth=5
dt_model.fit(X_train, y_train)
print("Model training complete.")


Training the Decision Tree Regressor model...
Model training complete.


In [5]:
# Make Predictions
print("\nMaking predictions on the test set...")
y_pred = dt_model.predict(X_test)


Making predictions on the test set...


In [6]:
y_pred

array([0.425  , 0.722  , 5.00001, ..., 2.167  , 1.293  , 2.221  ])

In [7]:
# Evaluate the Model Performance for Regression

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"\nMean Squared Error (MSE): {mse:.4f}")

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# R-squared ($R^2$) Score
r2 = r2_score(y_test, y_pred)
print(f"R-squared ($R^2$) Score: {r2:.4f}")


Mean Squared Error (MSE): 0.5280
Root Mean Squared Error (RMSE): 0.7266
R-squared ($R^2$) Score: 0.5977
