In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
df = pd.read_csv("housing.csv")

In [3]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
df.shape

(545, 13)

In [5]:
df.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [6]:
df.drop(['furnishingstatus'], axis=1, inplace=True)

In [7]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no


In [8]:
print("Unique values before mapping:", df['mainroad'].unique())

Unique values before mapping: ['yes' 'no']


In [9]:
binary_cols = [
    "guestroom",
    "basement",
    "hotwaterheating",
    "airconditioning",
    "prefarea"
]
for col in binary_cols:
    df[col] = df[col].map({"yes":1,"no":0})

df = df.replace({"yes":1,"no":0})

  df = df.replace({"yes":1,"no":0})


In [13]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea
0,13300000,7420,4,2,3,1,0,0,0,1,2,1
1,12250000,8960,4,4,4,1,0,0,0,1,3,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1
4,11410000,7420,4,1,2,1,1,1,0,1,2,0


In [14]:
df.isnull().sum()

price              0
area               0
bedrooms           0
bathrooms          0
stories            0
mainroad           0
guestroom          0
basement           0
hotwaterheating    0
airconditioning    0
parking            0
prefarea           0
dtype: int64

In [16]:
X = df.drop("price", axis=1)   # All columns except price
y = df["price"]                # Target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
model = LinearRegression()
model.fit(X_train,y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [17]:
y_pred = model.predict(X_test)


In [18]:
r2 = r2_score(y_test, y_pred)
print("R2 Score:", r2)


R2 Score: 0.6437296086614115


In [19]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 1800793719718.9675


In [20]:
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)


Intercept: -72427.55025565252
Coefficients: [2.37710510e+02 7.84836476e+04 1.10932614e+06 4.26909551e+05
 4.12677757e+05 2.43606446e+05 4.35005497e+05 7.12264653e+05
 8.08415428e+05 2.47443135e+05 6.36787694e+05]


In [25]:
new_house = [[5000, 3, 2, 2, 1, 1, 0, 1, 0, 1,0]]  # example values
prediction = model.predict(new_house)
print("Predicted Price:", prediction[0])


Predicted Price: 6040039.317956038




In [23]:
df.shape

(545, 12)

In [26]:
print("R2_Score",r2_score(y_test,y_pred))

R2_Score 0.6437296086614115


In [27]:
print(X.columns)


Index(['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom',
       'basement', 'hotwaterheating', 'airconditioning', 'parking',
       'prefarea'],
      dtype='object')


In [28]:
new_house = pd.DataFrame(
    [[5000, 3, 2, 2, 1, 1, 0, 1, 0, 1, 0]],
    columns=X.columns
)

prediction = model.predict(new_house)
print("Predicted Price:", prediction[0])


Predicted Price: 6040039.317956038


In [32]:
# Take input from user
new_area = float(input("Enter house area: "))
new_bedrooms = float(input("Enter bedrooms: "))
new_bathrooms = float(input("Enter bathrooms: "))
new_stories = float(input("Enter number of stories: "))
new_mainroad = int(input("Mainroad? (1=Yes, 0=No): "))
new_guestroom = int(input("Guestroom? (1=Yes, 0=No): "))
new_basement = int(input("Basement? (1=Yes, 0=No): "))
new_hotwaterheating = int(input("Hotwaterheating? (1=Yes, 0=No): "))
new_airconditioning = int(input("Airconditioning? (1=Yes, 0=No): "))
new_parking = float(input("Enter parking spaces: "))
new_prefarea = int(input("Prefarea? (1=Yes, 0=No): "))

new_house = pd.DataFrame([[
    new_area,
    new_bedrooms,
    new_bathrooms,
    new_stories,
    new_mainroad,
    new_guestroom,
    new_basement,
    new_hotwaterheating,
    new_airconditioning,
    new_parking,
    new_prefarea
]], columns=X.columns)

prediction = model.predict(new_house)
print("\nüè† Predicted House Price: ‚Çπ", round(prediction[0], 2))


Enter house area:  5000
Enter bedrooms:  3
Enter bathrooms:  2
Enter number of stories:  2
Mainroad? (1=Yes, 0=No):  1
Guestroom? (1=Yes, 0=No):  1
Basement? (1=Yes, 0=No):  0
Hotwaterheating? (1=Yes, 0=No):  1
Airconditioning? (1=Yes, 0=No):  1
Enter parking spaces:  1
Prefarea? (1=Yes, 0=No):  1



üè† Predicted House Price: ‚Çπ 7485242.44
