In [None]:
# 🧠 1. Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 📂 2. Load the dataset (make sure the CSV is in the same folder as this notebook)
df = pd.read_csv("house_prices.csv")

# 👀 Show the first 5 rows
df.head()

# 🧹 3. Data cleaning – Drop rows with missing values for selected features
features = ['sqft_living', 'bedrooms', 'bathrooms', 'floors']
df = df.dropna(subset=features + ['price'])

# 🧱 4. Prepare input features (X) and target (y)
X = df[features]
y = df['price']

# 🧠 5. Train the regression model
model = LinearRegression()
model.fit(X, y)

# 💵 6. Predict house price for a new example
example = [[2000, 3, 2, 1]]  # [sqft_living, bedrooms, bathrooms, floors]
predicted_price = model.predict(example)[0]

print(f"\ud83d\udcc8 Predicted price for a 2000 sqft, 3-bed, 2-bath, 1-floor house: \u00a3{predicted_price:,.2f}")

# 📊 7. Evaluate the model
y_pred = model.predict(X)

r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)

print(f"R² Score: {r2:.2f}")
print(f"Mean Squared Error: {mse:,.0f}")

# 📋 8. Compare actual vs predicted prices (sample)
comparison = pd.DataFrame({
    "Actual": y[:10].values,
    "Predicted": y_pred[:10]
})
comparison

# 📈 9. Visualize Actual vs Predicted Prices
plt.figure(figsize=(8, 6))
plt.scatter(y, y_pred, alpha=0.3, color='blue')
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.tight_layout()
plt.show()
