In [None]:
# Linear Regression on Indian Houses Dataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Load dataset
df = pd.read_csv("indian_houses_dataset.csv")  # <-- Ensure CSV file is in same directory
print("Dataset Preview:")
print(df.head())

# 2. Features and Target
X = df[["Square_Feet", "Bedrooms", "Bathrooms"]]
y = df["Price_INR"]

# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# 5. Predictions
y_pred = model.predict(X_test)

# 6. Evaluation
print("\nModel Evaluation:")
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):,.2f}")
print(f"Root Mean Squared Error: {np.sqrt(mean_squared_error(y_test, y_pred)):,.2f}")

# 7. Model Coefficients
print("\nModel Coefficients:")
print(f"Intercept: {model.intercept_:.2f}")
print(f"Square_Feet Coefficient: {model.coef_[0]:.2f}")
print(f"Bedrooms Coefficient: {model.coef_[1]:.2f}")
print(f"Bathrooms Coefficient: {model.coef_[2]:.2f}")

# 8. Visualization - Actual vs Predicted
plt.figure(figsize=(8, 5))
plt.scatter(y_test, y_pred, alpha=0.7, color="blue")
plt.xlabel("Actual Prices (INR)")
plt.ylabel("Predicted Prices (INR)")
plt.title("Actual vs Predicted House Prices")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color="red", linewidth=2)
plt.show()

# 9. Try a New Prediction
sample = np.array([[1500, 3, 2]])  # 1500 sq.ft, 3 bedrooms, 2 bathrooms
predicted_price = model.predict(sample)
print(f"\nPredicted price for 1500 sq.ft, 3 BHK, 2 Bath: ₹{predicted_price[0]:,.2f}")
