# Predictive Modeling

In [2]:
import pandas as pd
df = pd.read_csv(r"FeatureEngineering.csv")
df.head()

Unnamed: 0,property_title,property_location,price,area,listing_type,price_per_sqm,price_category,area_boxcox
0,325,54,-0.014715,-0.028311,0,-0.014736,1,9.425631
1,924,54,-0.014716,-0.030208,0,-0.014736,1,4.976388
2,88,75,-0.014716,-0.030212,0,-0.014736,1,2.203913
3,2070,69,-0.014716,-0.030212,0,-0.014736,1,2.203913
4,2715,66,-0.014712,-0.03021,0,-0.014735,0,4.407918


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
# Features and target
X = df.drop(columns=["price"])
y = df["price"]

# Split dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42, n_estimators=100)
}

# Train, predict, and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    r2 = r2_score(y_test, preds)
    print(f"{name}: MSE = {mse:.2f}, R2 = {r2:.2f}")

Linear Regression: MSE = 0.00, R2 = -345.86
Decision Tree: MSE = 0.00, R2 = -14.38
Random Forest: MSE = 0.00, R2 = -5.11
