In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# ==========================================
# Step 1: Load Data
# ==========================================
# OPTION A: If you have a CSV (uncomment below)
# df = pd.read_csv('housing.csv') 

# OPTION B: Dummy data (Run this if you don't have a file)
# We need at least 5 features as per your assignment
data = {
    'Area': [1500, 1800, 2400, 3000, 3500, 4000, 1200, 2000, 2500, 2800],
    'Bedrooms': [3, 4, 3, 5, 4, 5, 2, 3, 4, 4],
    'Bathrooms': [2, 3, 2, 3, 4, 4, 1, 2, 3, 3],
    'Age': [10, 5, 20, 2, 8, 1, 30, 15, 12, 6],
    'Garage': [1, 2, 2, 3, 3, 3, 0, 1, 2, 2],
    'Price': [300, 400, 350, 600, 650, 700, 250, 380, 420, 500] # Target (y)
}
df = pd.DataFrame(data)

# ==========================================
# Step 2: Preprocessing
# ==========================================
# Select 5 features for X
X = df[['Area', 'Bedrooms', 'Bathrooms', 'Age', 'Garage']]
y = df['Price']

# ==========================================
# Step 3: Split Data
# ==========================================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==========================================
# Step 4: Train Model
# ==========================================
model = LinearRegression()
model.fit(X_train, y_train)

# ==========================================
# Step 5: Prediction
# ==========================================
y_pred = model.predict(X_test)

# Predict for a specific house (Example: 2000 sqft, 3 bed, 2 bath, 10 yrs old, 1 garage)
sample_house = [[2000, 3, 2, 10, 1]]
predicted_price = model.predict(sample_house)
print(f"Predicted Price for sample house: {predicted_price[0]:.2f}")

# ==========================================
# Step 6: Evaluation (Report Coefficients & MSE)
# ==========================================
print("Coefficients (Weights):", model.coef_)
print("Intercept (Bias):", model.intercept_)
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))

# ==========================================
# Step 7: Visualization (Actual vs Predicted)
# ==========================================
# Since we have 5 dimensions, we plot Actual Price vs Predicted Price
plt.scatter(y_test, y_pred, color='blue')
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs Predicted House Prices')

# Draw a diagonal line (Perfect prediction line)
min_val = min(y_test.min(), y_pred.min())
max_val = max(y_test.max(), y_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], color='red', linestyle='--') 

plt.show()