In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load your dataset (you've already done this)
df = pd.read_csv('output.csv')

# Convert 'Price' column to numeric, converting non-numeric values to NaN
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# Drop rows with NaN values in the 'Price' column
df.dropna(subset=['Price'], inplace=True)
df.dropna(inplace=True)

# Exclude 'Price' column from the features (X)
X = df.drop(columns=['Price'])

# Target variable (y) is 'Price'
y = df['Price']

# Convert categorical variables to numeric using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=41)

# Initialize the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
rf_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf_model.predict(X_test)

actual_vs_predicted = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(actual_vs_predicted)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

      Actual  Predicted
368  12771.0   12587.42
335   9199.0    9143.31
643   7239.0    8107.00
815    395.0     322.19
340  12552.0   11055.40
46    9950.0    9346.69
627   9449.0    8939.69
363  23529.0   17670.66
232    230.0     360.44
647   3311.0    4034.50
67    3503.0   10309.14
588   1850.0    2764.89
66    3446.0    3727.15
51    7335.0    7785.99
640   9482.0    8749.30
604   4570.0    3890.11
362  16931.0   14105.61
803    378.0     301.60
75   11825.0   12256.10
611   2250.0    2570.09
714    174.0     295.59
48    8670.0    8248.87
220    337.0     339.22
657  90867.0   10808.36
645  56500.0   55598.41
642   8580.0    8511.14
628  10139.0    9040.56
524    395.0     340.18
333   3618.0    3725.85
616   4097.0    3075.77
338   7038.0    7690.41
590   2544.0    4267.67
59   12853.0   10823.71
277    415.0     401.57
602   3752.0    3267.91
132    169.0     290.44
620   8568.0    7993.22
79   10000.0   11195.20
78    9814.0   11752.98
523    235.0     331.03
290    539.0    