In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import pickle

# Load the data
df = pd.read_csv('Train_Data.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

# Define features and target
X = df.drop(['Price'], axis=1)
y = df['Price']

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest model with specified parameters
rf_model = RandomForestRegressor(
    max_depth=20, 
    min_samples_leaf=1, 
    min_samples_split=2, 
    n_estimators=200, 
    random_state=42
)

# Train the model on the full training set
rf_model.fit(x_train, y_train)

# Evaluate the model (optional)
train_pred = rf_model.predict(x_train)
test_pred = rf_model.predict(x_test)

train_mse = mean_squared_error(y_train, train_pred)
test_mse = mean_squared_error(y_test, test_pred)
train_mae = mean_absolute_error(y_train, train_pred)
test_mae = mean_absolute_error(y_test, test_pred)
train_r2 = r2_score(y_train, train_pred)
test_r2 = r2_score(y_test, test_pred)

print(f"Train MSE: {train_mse}, Test MSE: {test_mse}")
print(f"Train MAE: {train_mae}, Test MAE: {test_mae}")
print(f"Train R²: {train_r2}, Test R²: {test_r2}")

# Save the model as a pickle file
with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

print("Model saved as random_forest_model.pkl")

Train MSE: 10319700918.4657, Test MSE: 81513846023.77179
Train MAE: 42440.959605206335, Test MAE: 121861.41588892927
Train R²: 0.9911773725482702, Test R²: 0.9464780933256574
Model saved as random_forest_model.pkl
