In [None]:
# house_price_prediction.ipynb (Jupyter Notebook)

# You can copy this content into a Jupyter notebook cell

# 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# 2. Load Dataset
df = pd.read_csv("dataset/house_data.csv")  # Place your CSV here
print(df.head())

# 3. Data Preprocessing
# Example columns: ['area', 'bedrooms', 'bathrooms', 'location', 'price']
df = pd.get_dummies(df, columns=['location'], drop_first=True)
X = df.drop('price', axis=1)
y = df['price']

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Train the Model
model = LinearRegression()
model.fit(X_train, y_train)

# 6. Predict and Evaluate
y_pred = model.predict(X_test)
print("R2 Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))

# 7. Save the Model
joblib.dump(model, "model.pkl")

# 8. Visualization
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.show()
