In [12]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline



In [13]:
data=pd.read_csv("/content/Housing.csv")
data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [3]:


# Define features and target variable
X = data.drop(columns=['price'])
y = data['price']


categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns




In [4]:

numerical_transformer = StandardScaler()


categorical_transformer = OneHotEncoder(handle_unknown='ignore')


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])



In [5]:

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])



In [6]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [7]:

model.fit(X_train, y_train)

y_pred = model.predict(X_test)



In [22]:
 #Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae:}")
print(f"Mean Squared Error: {mse:}")
print(f"R-squared: {r2*100}")



Mean Absolute Error: 970043.4039201644
Mean Squared Error: 1754318687330.6682
R-squared: 65.29242642153174


In [9]:

import joblib
joblib.dump(model, 'house_price_model.pkl')



['house_price_model.pkl']

In [11]:
#
loaded_model = joblib.load('house_price_model.pkl')
loaded_predictions = loaded_model.predict(X_test)
print("Loaded model predictions:", loaded_predictions)

Loaded model predictions: [5164653.90033967 7224722.29802166 3109863.24240338 4612075.32722559
 3294646.25725956 3532275.09556558 5611774.56836474 6368145.98732718
 2722856.95689986 2629405.61585783 9617039.50315578 2798087.30447888
 3171096.76847064 3394639.09125529 3681088.65424276 5263187.74621486
 3035963.47612386 4786122.8004005  4349551.9200572  3572362.09930451
 5774875.21395649 5886993.57919883 2730836.19518459 4727316.47323636
 5244847.52716799 7555324.21605601 3220790.84680269 5191898.79934207
 8143726.91009782 3398814.09825036 6490693.05027925 3315105.90747811
 6708457.36761325 4201738.21071676 3557571.06735186 5836974.50478626
 4808660.67448475 4362878.73613262 3191242.95701508 4596554.93225239
 4566042.8604841  3517779.52374149 7205844.79365835 3983597.27861103
 3749338.70271055 4274731.09125895 6757442.10783741 4037320.43665851
 3769334.90397125 3417627.44377157 7268416.6764461  2802534.35431385
 4341750.61420451 4516422.15345642 3679997.65037848 2678959.06481153
 7498029