# 🏠 House Price Prediction using Linear Regression
This notebook uses the Kaggle dataset "House Prices - Advanced Regression Techniques" to build a linear regression model to predict house prices based on selected features.

In [None]:
# 📌 Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [None]:
# 📌 Step 2: Load Dataset
df = pd.read_csv('train.csv')
print(df.shape)
df.head()

In [None]:
# 📌 Step 3: Preprocessing
df = df.drop(['Alley', 'PoolQC', 'Fence', 'MiscFeature'], axis=1)

# Fill missing numeric values
df.fillna(df.select_dtypes(include=[np.number]).mean(), inplace=True)

# Fill missing categorical values
for col in df.select_dtypes(include=['object']).columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# Label Encoding
le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col])

In [None]:
# 📌 Step 4: Feature Selection
features = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea',
            'TotalBsmtSF', '1stFlrSF', 'YearBuilt', 'FullBath']

X = df[features]
y = df['SalePrice']

In [None]:
# 📌 Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 📌 Step 6: Evaluate Model
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

In [None]:
# 📌 Step 7: Visualize Results
plt.figure(figsize=(8,6))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel("Actual Sale Price")
plt.ylabel("Predicted Sale Price")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.show()

### 🔍 Observations
- The model performs reasonably well with selected numeric features.
- RMSE tells the average error in predictions.
- R² Score shows how well the model explains variance in price.
- Adding more relevant features can further improve performance.