In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv("train.csv")

# Show first 5 rows
data.head()


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [3]:
# Select required columns
selected_data = data[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'SalePrice']]

# Show first 5 rows
selected_data.head()


Unnamed: 0,GrLivArea,BedroomAbvGr,FullBath,SalePrice
0,1710,3,2,208500
1,1262,3,2,181500
2,1786,3,2,223500
3,1717,3,1,140000
4,2198,4,2,250000


In [4]:
# Input features (X)
X = selected_data[['GrLivArea', 'BedroomAbvGr', 'FullBath']]

# Target variable (y)
y = selected_data['SalePrice']

# Show X and y shapes
print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (1460, 3)
y shape: (1460,)


In [5]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Check the shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (1168, 3)
X_test shape: (292, 3)
y_train shape: (1168,)
y_test shape: (292,)


In [6]:
from sklearn.linear_model import LinearRegression

# Create the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

print("Model training completed")


Model training completed


In [7]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)
print("R2 Score:", r2)


Mean Absolute Error (MAE): 35788.061292436294
Root Mean Squared Error (RMSE): 52975.71771338122
R2 Score: 0.6341189942328371


In [8]:
# Predict price for a custom house
# Example: 2000 sq ft, 3 bedrooms, 2 bathrooms

custom_house = [[2000, 3, 2]]

predicted_price = model.predict(custom_house)

print("Predicted House Price:", predicted_price[0])


Predicted House Price: 240377.51479736282




In [9]:
import joblib

# Save the trained model
joblib.dump(model, "house_price_model.pkl")

print("Model saved as house_price_model.pkl")


Model saved as house_price_model.pkl
