<a href="https://colab.research.google.com/github/Nawin03-DS/ML-Project/blob/main/HOUSE_PRICE_PREDICTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PROJECT 1: HOUSE PRICE PREDICTION

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Generate synthetic house data
np.random.seed(42)
n_samples = 1000

house_data = pd.DataFrame({
    'square_feet': np.random.randint(800, 4000, n_samples),
    'bedrooms': np.random.randint(1, 6, n_samples),
    'bathrooms': np.random.randint(1, 4, n_samples),
    'age': np.random.randint(0, 50, n_samples),
    'garage': np.random.randint(0, 3, n_samples)
})

# Generate price based on features with some noise
house_data['price'] = (
    house_data['square_feet'] * 150 +
    house_data['bedrooms'] * 10000 +
    house_data['bathrooms'] * 15000 -
    house_data['age'] * 2000 +
    house_data['garage'] * 8000 +
    np.random.normal(0, 20000, n_samples)
)

# Split features and target
X = house_data.drop('price', axis=1)
y = house_data['price']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
lr_pred = lr_model.predict(X_test)
rf_pred = rf_model.predict(X_test)

# Evaluation
print(f"\nLinear Regression - R² Score: {r2_score(y_test, lr_pred):.4f}")
print(f"Linear Regression - RMSE: ${np.sqrt(mean_squared_error(y_test, lr_pred)):,.2f}")
print(f"\nRandom Forest - R² Score: {r2_score(y_test, rf_pred):.4f}")
print(f"Random Forest - RMSE: ${np.sqrt(mean_squared_error(y_test, rf_pred)):,.2f}")

# Feature importance
print("\nFeature Importance (Random Forest):")
for feature, importance in zip(X.columns, rf_model.feature_importances_):
    print(f"{feature}: {importance:.4f}")


Linear Regression - R² Score: 0.9790
Linear Regression - RMSE: $19,313.52

Random Forest - R² Score: 0.9638
Random Forest - RMSE: $25,347.68

Feature Importance (Random Forest):
square_feet: 0.9435
bedrooms: 0.0084
bathrooms: 0.0045
age: 0.0409
garage: 0.0027
