In [1]:
# --------------------------
# House Price Prediction using Linear Regression
# --------------------------

# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --------------------------
# STEP 1: Load the dataset
# --------------------------
df = pd.read_csv("House Price Prediction Dataset.csv")

# Display first few rows
print("Dataset Preview:")
print(df.head())

# --------------------------
# STEP 2: Select features and target
# --------------------------
X = df[['Area', 'Bedrooms', 'Bathrooms']]  # Features
y = df['Price']                            # Target variable

# --------------------------
# STEP 3: Split dataset into training and testing sets
# --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --------------------------
# STEP 4: Initialize and train the model
# --------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# --------------------------
# STEP 5: Make predictions
# --------------------------
y_pred = model.predict(X_test)

# --------------------------
# STEP 6: Evaluate the model
# --------------------------
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Display results
print("\n=== Model Performance ===")
print(f"Mean Absolute Error (MAE): ₹{mae:,.2f}")
print(f"Root Mean Squared Error (RMSE): ₹{rmse:,.2f}")
print(f"R² Score: {r2:.4f}")

# --------------------------
# STEP 7: Model coefficients
# --------------------------
print("\n=== Model Coefficients ===")
print(f"Intercept: {model.intercept_:.2f}")
print(f"Coefficients: {dict(zip(X.columns, model.coef_))}")

# --------------------------
# STEP 8: Predict price for new data
# --------------------------
new_data = pd.DataFrame({
    'Area': [2500, 1800],
    'Bedrooms': [3, 2],
    'Bathrooms': [2, 1]
})

predicted_price = model.predict(new_data)

print("\n=== Price Prediction ===")
for i, price in enumerate(predicted_price):
    print(f"House {i+1} → Predicted Price: ₹{price:,.2f}")


Dataset Preview:
   Id  Area  Bedrooms  Bathrooms  Floors  YearBuilt  Location  Condition  \
0   1  1360         5          4       3       1970  Downtown  Excellent   
1   2  4272         5          4       3       1958  Downtown  Excellent   
2   3  3592         2          2       3       1938  Downtown       Good   
3   4   966         4          2       2       1902  Suburban       Fair   
4   5  4926         1          4       2       1975  Downtown       Fair   

  Garage   Price  
0     No  149919  
1     No  424998  
2     No  266746  
3    Yes  244020  
4    Yes  636056  

=== Model Performance ===
Mean Absolute Error (MAE): ₹243,756.48
Root Mean Squared Error (RMSE): ₹280,072.40
R² Score: -0.0082

=== Model Coefficients ===
Intercept: 561284.04
Coefficients: {'Area': np.float64(-0.09540955777516716), 'Bedrooms': np.float64(-13.25953772748403), 'Bathrooms': np.float64(-9759.66118344161)}

=== Price Prediction ===
House 1 → Predicted Price: ₹541,486.41
House 2 → Predicted Price