In [13]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

from sklearn.decomposition import PCA

In [15]:

df = pd.read_csv("/content/gurgaon_properties_post_feature_selection_v2.csv")
df = df.drop(columns=['store room','floor_category','balcony'])

In [16]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,agePossession,built_up_area,servant room,furnishing_type,luxury_category
0,flat,sector 36,0.82,3.0,2.0,New Property,850.0,0.0,0.0,Low
1,flat,sector 89,0.95,2.0,2.0,New Property,1226.0,1.0,0.0,Low
2,flat,sohna road,0.32,2.0,2.0,New Property,1000.0,0.0,0.0,Low
3,flat,sector 92,1.6,3.0,4.0,Relatively New,1615.0,1.0,1.0,High
4,flat,sector 102,0.48,2.0,2.0,Relatively New,582.0,0.0,0.0,High


In [17]:
df['agePossession'] = df['agePossession'].replace(
    {
        'Relatively New':'new',
        'Moderately Old':'old',
        'New Property' : 'new',
        'Old Property' : 'old',
        'Under Construction' : 'under construction'
    }
)

In [19]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,agePossession,built_up_area,servant room,furnishing_type,luxury_category
0,0,sector 36,0.82,3.0,2.0,new,850.0,0.0,0.0,Low
1,0,sector 89,0.95,2.0,2.0,new,1226.0,1.0,0.0,Low
2,0,sohna road,0.32,2.0,2.0,new,1000.0,0.0,0.0,Low
3,0,sector 92,1.6,3.0,4.0,new,1615.0,1.0,1.0,High
4,0,sector 102,0.48,2.0,2.0,new,582.0,0.0,0.0,High


In [18]:
df['property_type'] = df['property_type'].replace({'flat':0,'house':1})

  df['property_type'] = df['property_type'].replace({'flat':0,'house':1})


In [20]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,agePossession,built_up_area,servant room,furnishing_type,luxury_category
0,0,sector 36,0.82,3.0,2.0,new,850.0,0.0,0.0,Low
1,0,sector 89,0.95,2.0,2.0,new,1226.0,1.0,0.0,Low
2,0,sohna road,0.32,2.0,2.0,new,1000.0,0.0,0.0,Low
3,0,sector 92,1.6,3.0,4.0,new,1615.0,1.0,1.0,High
4,0,sector 102,0.48,2.0,2.0,new,582.0,0.0,0.0,High


In [21]:
df['luxury_category'] = df['luxury_category'].replace({'Low':0,'Medium':1,'High':2})

In [22]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,agePossession,built_up_area,servant room,furnishing_type,luxury_category
0,0,sector 36,0.82,3.0,2.0,new,850.0,0.0,0.0,0
1,0,sector 89,0.95,2.0,2.0,new,1226.0,1.0,0.0,0
2,0,sohna road,0.32,2.0,2.0,new,1000.0,0.0,0.0,0
3,0,sector 92,1.6,3.0,4.0,new,1615.0,1.0,1.0,2
4,0,sector 102,0.48,2.0,2.0,new,582.0,0.0,0.0,2


In [25]:
df['agePossession'].value_counts()

Unnamed: 0_level_0,count
agePossession,Unnamed: 1_level_1
new,2331
old,946
under construction,277


In [26]:
age_map = {'new': 0.1, 'old': 0.5, 'under Construction': 0.9}
df['age_score'] = df['agePossession'].map(age_map)

In [27]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,agePossession,built_up_area,servant room,furnishing_type,luxury_category,age_score
0,0,sector 36,0.82,3.0,2.0,new,850.0,0.0,0.0,0,0.1
1,0,sector 89,0.95,2.0,2.0,new,1226.0,1.0,0.0,0,0.1
2,0,sohna road,0.32,2.0,2.0,new,1000.0,0.0,0.0,0,0.1
3,0,sector 92,1.6,3.0,4.0,new,1615.0,1.0,1.0,2,0.1
4,0,sector 102,0.48,2.0,2.0,new,582.0,0.0,0.0,2,0.1


In [28]:

df = df.drop(columns=['property_type', 'sector', 'luxury_category', 'agePossession'])

In [29]:
df.head()

Unnamed: 0,price,bedRoom,bathroom,built_up_area,servant room,furnishing_type,age_score
0,0.82,3.0,2.0,850.0,0.0,0.0,0.1
1,0.95,2.0,2.0,1226.0,1.0,0.0,0.1
2,0.32,2.0,2.0,1000.0,0.0,0.0,0.1
3,1.6,3.0,4.0,1615.0,1.0,1.0,0.1
4,0.48,2.0,2.0,582.0,0.0,0.0,0.1


In [30]:
X = df.drop(columns=['price'])
y = df['price']

In [31]:
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline

model = make_pipeline(
    SimpleImputer(strategy='mean'),
    StandardScaler(),
    Ridge()
)

In [32]:
model.fit(X, y)

In [35]:
x_sample = X.mean().to_frame().T
x_sample

Unnamed: 0,bedRoom,bathroom,built_up_area,servant room,furnishing_type,age_score
0,3.092572,3.267586,1861.51579,0.358751,0.391671,0.215471


In [None]:
# +1 bedroom

In [36]:
x_sample = X.mean().to_frame().T

def predict_change(feature, change):
    x_new = x_sample.copy()
    x_new[feature] += change
    price_before = model.predict(x_sample)[0]
    price_after = model.predict(x_new)[0]
    diff = price_after - price_before
    percent = (diff / price_before) * 100

    return {
        'Price Before': round(price_before, 2),
        'Price After': round(price_after, 2),
        'Increase in ₹ Cr': round(diff, 2),
        'Percent Increase': round(percent, 2)
    }

In [37]:
predict_change('bedRoom',1)

{'Price Before': np.float64(2.44),
 'Price After': np.float64(2.61),
 'Increase in ₹ Cr': np.float64(0.16),
 'Percent Increase': np.float64(6.75)}