In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('output.csv')

# Convert 'Price' column to numeric, converting non-numeric values to NaN
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# Drop rows with NaN values in the 'Price' column
df.dropna(subset=['Price'], inplace=True)
df.dropna(inplace=True)

# Exclude 'Price' column from the features (X)
X = df.drop(columns=['Price'])

# Target variable (y) is 'Price'
y = df['Price']

# Convert categorical variables to numeric using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
rf_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

actual_vs_predicted = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(actual_vs_predicted)

Mean Squared Error (MSE): 137519243.29293334
Root Mean Squared Error (RMSE): 11726.859907619488
R-squared (R2): 0.3652423976867728
      Actual  Predicted
367  13200.0   11417.41
52    5449.0    5202.35
67    3503.0   22835.33
633   7441.0    7550.66
602   3752.0    3439.00
585   2419.0    3851.54
341   7819.0    8003.33
645  56500.0   41002.75
649   3538.0   21664.70
129    733.0     743.71
338   7038.0    7369.68
657  90867.0   10822.61
640   9482.0    8469.28
350  12793.0   10584.88
661   9131.0    9182.87
652   7019.0   12690.64
327   6968.0    7097.19
627   9449.0    9249.37
354  61250.0   41003.00
340  12552.0   10925.09
596   2962.0    2772.93
53    3530.0    4285.45
623   4316.0    5024.79
365  11504.0   10653.60
337  10187.0    9475.10
332   3618.0    4660.65
356   3323.0    4701.81
339   9098.0    8169.62
597   4538.0    3412.93
606   8988.0    6028.28
46    9950.0    9319.78
55    4973.0    6651.19
290    539.0     516.68
347   6050.0    5767.56
625   4793.0    5169.03
447  