In [8]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load dataset
file_path = r"D:\shadow\HousingData.csv"
data = pd.read_csv(file_path)

# Separate features and target
target_column = "MEDV"
X = data.drop(columns=[target_column])
y = data[target_column]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate model
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # Fixed RMSE calculation
print(f"Model evaluation on test set: MAE = {mae:.2f}, RMSE = {rmse:.2f}\n")

# --- Interactive prediction with 5 main features ---
print("Enter the 5 main details of the house to predict its price:")

# Default values (mean of dataset)
default_values = X.mean()

# 5 main features with realistic ranges
main_features = {
    "RM": {"desc": "Average number of rooms per dwelling", "range": (3.5, 9.0)}, 
    "LSTAT": {"desc": "% lower status of the population", "range": (1.0, 35.0)},
    "PTRATIO": {"desc": "Pupil-teacher ratio by town", "range": (12.0, 22.0)},
    "CRIM": {"desc": "Per capita crime rate by town", "range": (0.0, 90.0)},
    "DIS": {"desc": "Weighted distances to employment centers", "range": (1.0, 12.0)}
}

# Start with default values
user_input = default_values.copy()

# Ask user to enter 5 main features
for col, info in main_features.items():
    while True:
        try:
            val = float(input(f"{col} ({info['desc']}, typical range {info['range'][0]} - {info['range'][1]}): "))
            if info['range'][0] <= val <= info['range'][1]:
                user_input[col] = val
                break
            else:
                print(f"Please enter a value within the suggested range {info['range']}")
        except ValueError:
            print("Invalid input! Please enter a number.")

# Convert user input to DataFrame to keep feature names (avoids warning)
user_input_df = pd.DataFrame([user_input])

# Scale the input
user_input_scaled = scaler.transform(user_input_df)

# Predict house price
predicted_price = model.predict(user_input_scaled)[0]

# Convert to full dollars (MEDV is in $1000s)
predicted_price_usd = predicted_price * 1000

# Display prediction with currency
print(f"\nPredicted House Price: ${predicted_price_usd:,.2f}")
print("Note: The predicted price is in US Dollars based on the Boston Housing dataset (values in $1000s).")


Model evaluation on test set: MAE = 2.00, RMSE = 2.79

Enter the 5 main details of the house to predict its price:


RM (Average number of rooms per dwelling, typical range 3.5 - 9.0):  4.6
LSTAT (% lower status of the population, typical range 1.0 - 35.0):  31.66
PTRATIO (Pupil-teacher ratio by town, typical range 12.0 - 22.0):  15
CRIM (Per capita crime rate by town, typical range 0.0 - 90.0):  56.8
DIS (Weighted distances to employment centers, typical range 1.0 - 12.0):  2.5



Predicted House Price: $15,027.00
Note: The predicted price is in US Dollars based on the Boston Housing dataset (values in $1000s).
