# 🏠 Predicting House Prices with Linear Regression
---
**Objective**: Build a linear regression model to predict house prices based on selected features.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Display settings
pd.set_option('display.max_columns', None)
sns.set(style='whitegrid')

## 📥 Load the Dataset

In [None]:
# Load your dataset
# Replace 'house_prices.csv' with your dataset filename
df = pd.read_csv('house_prices.csv')

# Display basic info
df.head()

## 🧼 Data Cleaning

In [None]:
# Check for missing values
df.isnull().sum().sort_values(ascending=False).head(10)

In [None]:
# Drop rows with missing values (basic approach)
df = df.dropna()

# Drop ID column if present
if 'Id' in df.columns:
    df = df.drop(['Id'], axis=1)
df.head()

## 📊 Feature Selection

In [None]:
# Use one or more relevant features
X = df[['GrLivArea']]  # Example feature
y = df['SalePrice']     # Target variable

## 🔀 Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 🤖 Model Training

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

## 📈 Model Evaluation

In [None]:
y_pred = model.predict(X_test)

print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('R² Score:', r2_score(y_test, y_pred))

## 📉 Visualization: Actual vs Predicted

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.6, color='b')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.show()