# 🏠 House Price Prediction using Machine Learning

This notebook demonstrates predicting house prices based on features like **area, bedrooms, bathrooms, and location score** using **Linear Regression and Random Forest**.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


## 📊 Step 1: Create Synthetic Dataset

In [None]:

np.random.seed(42)
n_samples = 500

# Features
area = np.random.randint(500, 3500, n_samples)
bedrooms = np.random.randint(1, 6, n_samples)
bathrooms = np.random.randint(1, 4, n_samples)
location_score = np.random.randint(1, 11, n_samples)

# Target variable (Price)
price = (area * 300) + (bedrooms * 50000) + (bathrooms * 30000) + (location_score * 10000) + np.random.randint(-50000, 50000, n_samples)

# DataFrame
data = pd.DataFrame({
    'Area': area,
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Location_Score': location_score,
    'Price': price
})

data.head()


## 📊 Step 2: Data Visualization

In [None]:

plt.scatter(data['Area'], data['Price'], alpha=0.5)
plt.xlabel('Area (sq ft)')
plt.ylabel('Price')
plt.title('Area vs Price')
plt.show()


## 🔀 Step 3: Train-Test Split

In [None]:

X = data[['Area', 'Bedrooms', 'Bathrooms', 'Location_Score']]
y = data['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## 🤖 Step 4: Train Linear Regression Model

In [None]:

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lr = lin_reg.predict(X_test)

lr_rmse = np.sqrt(mean_squared_error(y_test, y_pred_lr))
lr_r2 = r2_score(y_test, y_pred_lr)

print("Linear Regression RMSE:", lr_rmse)
print("Linear Regression R² Score:", lr_r2)


## 🌲 Step 5: Train Random Forest Model

In [None]:

rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)
y_pred_rf = rf_reg.predict(X_test)

rf_rmse = np.sqrt(mean_squared_error(y_test, y_pred_rf))
rf_r2 = r2_score(y_test, y_pred_rf)

print("Random Forest RMSE:", rf_rmse)
print("Random Forest R² Score:", rf_r2)


## 📈 Step 6: Model Comparison

In [None]:

results = {
    "Linear Regression": {"RMSE": lr_rmse, "R2 Score": lr_r2},
    "Random Forest": {"RMSE": rf_rmse, "R2 Score": rf_r2}
}

results_df = pd.DataFrame(results).T
print(results_df)
results_df.plot(kind='bar', figsize=(8,6), legend=True)
plt.title("Model Comparison (RMSE and R² Score)")
plt.show()


## 🚀 Step 7: Future Improvements


- Use advanced models like XGBoost, LightGBM.  
- Perform hyperparameter tuning using GridSearchCV.  
- Deploy the model with Flask or Streamlit.  
