# **House Price Prediction - Regression Model**

In [1]:
!pip install pandas numpy scikit-learn -q

###**1. Import Libraries**

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


###**2. Load Dataset**

In [6]:
data = pd.read_csv('house_prices.csv')
print("Dataset loaded successfully!")

Dataset loaded successfully!


In [7]:
data.head(5)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


###**3. Data Cleaning**

In [8]:
# Drop unnecessary columns
data.drop(['Id'], axis=1, inplace=True)

In [10]:
#data.fillna(data.median(), inplace=True)
# Fill missing values with the median for numeric columns only
numeric_columns = data.select_dtypes(include=['number']).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].median())


In [11]:
categorical_columns = data.select_dtypes(include=['object']).columns
data[categorical_columns] = data[categorical_columns].fillna(data[categorical_columns].mode().iloc[0])


In [23]:
# Encode categorical variables
data = pd.get_dummies(data)

###**4. Split Dataset**

In [18]:
#
X = data.drop('SalePrice', axis=1)
y = data['SalePrice']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

###**5. Build and Train Model**

In [19]:
model = LinearRegression()
model.fit(X_train, y_train)

###**6. Evaluate Model**

In [20]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [21]:
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")

Mean Squared Error: 873658179.2064384
R² Score: 0.8860989538635505


###**7. Save Model Predictions**

In [22]:

predictions = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
predictions.to_csv('house_price_predictions.csv', index=False)
print("Predictions saved!")

Predictions saved!
