\# Housing Price Analysis with Random Forest

\# Import necessary libraries

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error,
r2_score

import matplotlib.pyplot as plt

\# Load the dataset (example dataset)

url =
'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv'

df = pd.read_csv(url)

\# Display basic info

print('Dataset shape:', df.shape)

print(df.head())

\# Data preprocessing: Encode categorical features

df_encoded = pd.get_dummies(df, drop_first=True)

\# Define features and target variable

X = df_encoded.drop('price', axis=1)

y = df_encoded\['price'\]

\# Split data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

\# Initialize Random Forest model

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

\# Train the model

rf_model.fit(X_train, y_train)

\# Make predictions

y_pred = rf_model.predict(X_test)

\# Evaluate model performance

mae = mean_absolute_error(y_test, y_pred)

mse = mean_squared_error(y_test, y_pred)

rmse = np.sqrt(mse)

r2 = r2_score(y_test, y_pred)

print('\nModel Performance:')

print('Mean Absolute Error:', mae)

print('Root Mean Squared Error:', rmse)

print('R² Score:', r2)

\# Plot feature importance

feature_importance = pd.Series(rf_model.feature_importances\_,
index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(10,6))

feature_importance\[:10\].plot(kind='bar')

plt.title('Top 10 Important Features')

plt.ylabel('Importance Score')

plt.xlabel('Features')

plt.show()

\# Creative touch — Display prediction comparison

comparison = pd.DataFrame({'Actual': y_test\[:10\].values, 'Predicted':
y_pred\[:10\]})

print('\nSample Predictions:')

print(comparison)

\# Add conclusion

def conclusion():

print('\nConclusion:')

print('The Random Forest model provides a reliable prediction of housing
(or diamond) prices by analyzing multiple features.')

print('It reduces overfitting through ensemble learning and highlights
the most impactful variables on pricing.')

conclusion()