# 🏠 Boston Housing Price Prediction (Python Version)
Linear regression project to predict median home prices using the Boston Housing dataset.

In [None]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Step 2: Load dataset
df = pd.read_csv('BostonHousing-1.csv')
df.head()

In [None]:
# Step 3: Partition data (60:25:15)
# First split into train (60%) and temp (40%)
train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
# Then split temp into validation (25%) and holdout (15%)
val_df, holdout_df = train_test_split(temp_df, test_size=0.375, random_state=42)  # 0.375 * 40% = 15%
print(f'Train: {len(train_df)}, Validation: {len(val_df)}, Holdout: {len(holdout_df)}')

In [None]:
# Step 4: Fit linear regression model on Train set (CRIM, CHAS, RM)
X_train = train_df[['CRIM', 'CHAS', 'RM']]
y_train = train_df['MEDV']
model = LinearRegression()
model.fit(X_train, y_train)
print('Intercept:', model.intercept_)
print('Coefficients:', dict(zip(X_train.columns, model.coef_)))

In [None]:
# Step 5: Predict on custom input: CRIM=0.1, CHAS=0, RM=6
input_data = pd.DataFrame([[0.1, 0, 6]], columns=['CRIM', 'CHAS', 'RM'])
predicted_price = model.predict(input_data)
print('Predicted Median House Price ($1000s):', round(predicted_price[0], 2))

In [None]:
# Step 6: Evaluate on Validation set
X_val = val_df[['CRIM', 'CHAS', 'RM']]
y_val = val_df['MEDV']
y_pred_val = model.predict(X_val)
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
mae_val = mean_absolute_error(y_val, y_pred_val)
mape_val = np.mean(np.abs((y_val - y_pred_val) / y_val)) * 100
print(f'Validation RMSE: {rmse_val:.2f}')
print(f'Validation MAE: {mae_val:.2f}')
print(f'Validation MAPE: {mape_val:.2f}%')

In [None]:
# Step 7: Evaluate on Holdout set
X_hold = holdout_df[['CRIM', 'CHAS', 'RM']]
y_hold = holdout_df['MEDV']
y_pred_hold = model.predict(X_hold)
rmse_hold = np.sqrt(mean_squared_error(y_hold, y_pred_hold))
mae_hold = mean_absolute_error(y_hold, y_pred_hold)
mape_hold = np.mean(np.abs((y_hold - y_pred_hold) / y_hold)) * 100
print(f'Holdout RMSE: {rmse_hold:.2f}')
print(f'Holdout MAE: {mae_hold:.2f}')
print(f'Holdout MAPE: {mape_hold:.2f}%')