In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor

import pickle

import warnings
warnings.filterwarnings('ignore')

# Load data
df = pd.read_csv("preprocessed_data.csv")

# Scale numeric columns (e.g., Area)
area_scaler = StandardScaler()
df['Area (sqft)'] = area_scaler.fit_transform(df[['Area (sqft)']])

# Save the scaler
with open('area_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(area_scaler, scaler_file)

# Define features (X) and target (y)
X = df[['Area (sqft)', 'BHK', 'Bathrooms', 'Construction Status', 'City', 'Location_encoded']]
y = df['Price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Custom scorers
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)
r2_scorer = make_scorer(r2_score)

model = RandomForestRegressor(n_estimators=300,min_samples_split=5 ,min_samples_leaf=2,max_depth=20,bootstrap=True)
model.fit(X_train,y_train)

with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(model, file)

y_pred = model.predict(X_test)

mean_absolute_error(y_test, y_pred)

r2_score(y_test, y_pred)