In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load dataset with only necessary columns
cols = ['Amount', 'Price', 'Area'] + \
       [c for c in pd.read_csv('cleaned_dataset.csv', nrows=0).columns
        if c not in ['Amount', 'Price', 'Area']]
print(cols)
df = pd.read_csv('cleaned_dataset.csv', usecols=cols)

# Define numeric features and target
num_features = ['Price', 'Area']
target = 'Amount'

# Split into train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Scale numeric features
scaler_X = StandardScaler()
train_df[num_features] = scaler_X.fit_transform(train_df[num_features])
test_df[num_features] = scaler_X.transform(test_df[num_features])

# Scale target
scaler_y = StandardScaler()
train_df[target] = scaler_y.fit_transform(train_df[[target]])
test_df[target] = scaler_y.transform(test_df[[target]])

# Prepare arrays
y_train = train_df[target].values
y_test_scaled = test_df[target].values
X_train = train_df.drop(columns=[target]).values
X_test = test_df.drop(columns=[target]).values

# Add intercept term
X_train_const = np.hstack([np.ones((X_train.shape[0], 1)), X_train])
X_test_const = np.hstack([np.ones((X_test.shape[0], 1)), X_test])

# Closed-form MLE: β̂ = (XᵀX)⁻¹ Xᵀy in scaled space
beta_hat = np.linalg.solve(
    X_train_const.T.dot(X_train_const),
    X_train_const.T.dot(y_train)
)

# Predict (scaled)
y_pred_scaled = X_test_const.dot(beta_hat)

# Compute metrics on scaled data
rmse_scaled = np.sqrt(mean_squared_error(y_test_scaled, y_pred_scaled))
mae_scaled = mean_absolute_error(y_test_scaled, y_pred_scaled)
r2_scaled = r2_score(y_test_scaled, y_pred_scaled)

print(f"RMSE: {rmse_scaled:.4f}")
print(f"MAE:  {mae_scaled:.4f}")
print(f"R²:   {r2_scaled:.4f}")

['Amount', 'Price', 'Area', 'Bathroom', 'Balcony', 'Car_Parking', 'Remaining_Floor', 'Garden/Park', 'Pool', 'Main Road', 'BHK', 'Location_Other', 'Location_ahmedabad', 'Location_bangalore', 'Location_chennai', 'Location_faridabad', 'Location_greater-noida', 'Location_gurgaon', 'Location_hyderabad', 'Location_jaipur', 'Location_kolkata', 'Location_new-delhi', 'Location_pune', 'Location_surat', 'Location_thane', 'Location_vadodara', 'Transaction_New Property', 'Transaction_Other', 'Transaction_Resale', 'Furnishing_Furnished', 'Furnishing_Semi-Furnished', 'Furnishing_Unfurnished', 'Facing_East', 'Facing_North', 'Facing_North - East', 'Facing_North - West', 'Facing_South', 'Facing_South - East', 'Facing_South - West', 'Facing_Unknown', 'Facing_West']
Scaled RMSE: 0.4901
Scaled MAE:  0.3011
Scaled R²:   0.7586
