In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [8]:
# Load the dataset
data = pd.read_csv("Real_estate.csv")

In [10]:
# Split the data into features (X) and the target variable (Y)
X = data[['X1 transaction date', 'X2 house age', 'X3 distance to the nearest MRT station',
          'X4 number of convenience stores', 'X5 latitude', 'X6 longitude']]
Y = data['Y house price of unit area']

In [11]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [12]:
# Train a Random Forest Regressor
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, Y_train)

In [13]:
# Train a Decision Tree Regressor
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, Y_train)

In [14]:
# Make predictions on the test set
rf_predictions = rf_model.predict(X_test)
dt_predictions = dt_model.predict(X_test)

In [15]:
# Evaluate Random Forest model
rf_mse = mean_squared_error(Y_test, rf_predictions)
rf_mae = mean_absolute_error(Y_test, rf_predictions)
rf_r2 = r2_score(Y_test, rf_predictions)

In [16]:
print("Random Forest Regressor:")
print("Mean Squared Error (MSE):", rf_mse)
print("Mean Absolute Error (MAE):", rf_mae)
print("R-squared:", rf_r2)

Random Forest Regressor:
Mean Squared Error (MSE): 32.41107270244575
Mean Absolute Error (MAE): 3.9596894148020674
R-squared: 0.8068006553352045


In [17]:
# Evaluate Decision Tree model
dt_mse = mean_squared_error(Y_test, dt_predictions)
dt_mae = mean_absolute_error(Y_test, dt_predictions)
dt_r2 = r2_score(Y_test, dt_predictions)

In [18]:
print("\nDecision Tree Regressor:")
print("Mean Squared Error (MSE):", dt_mse)
print("Mean Absolute Error (MAE):", dt_mae)
print("R-squared:", dt_r2)


Decision Tree Regressor:
Mean Squared Error (MSE): 66.47144578313254
Mean Absolute Error (MAE): 5.9313253012048195
R-squared: 0.6037699868152279


In [None]:
Performance Metrics:

Mean Squared Error (MSE): Lower values indicate better performance.
Mean Absolute Error (MAE): Lower values indicate better performance.
R-squared (R2): Higher values indicate better fit of the model to the data.

Model Complexity:

Decision Tree: Typically simpler models compared to Random Forests as they consist of a single tree.
Random Forest: Ensemble of decision trees, generally more complex due to multiple trees.

Interpretability:

Decision Tree: Easier to interpret since it's a single tree structure, which allows for understanding how each feature contributes to predictions.
Random Forest: More complex, making it harder to interpret the combined effect of individual features on predictions.