# Machine Learning Algorithm

In [1]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_cv = pd.read_csv('inputs/real_estate_model_cv.csv')
data_cv.head()

Unnamed: 0,City,State,Country,Property Type clean,Sub-Area clean,Company Name clean,TownShip Name/ Society Name clean,Description clean,ClubHouse Clean,School / University in Township Clean,...,amenities like,boasts elegant,elegant towers,great community,mantra gold,offering bedroom,quality specification,stories offering,towers stories,world class
0,pune,maharashtra,india,1.0,bavdhan,shapoorji paloonji,vanaha,shapoorji paloonji comunity located suburbs ba...,1,1,...,0,0,0,0,0,0,0,0,0,0
1,pune,maharashtra,india,2.0,bavdhan,shapoorji paloonji,vanaha,vanaha township located near lonavala hill ran...,1,1,...,0,0,0,0,0,0,0,0,0,0
2,pune,maharashtra,india,3.0,bavdhan,shapoorji paloonji,vanaha,vanaha society suitable aged group people play...,1,1,...,0,0,0,0,0,0,0,0,0,0
3,pune,maharashtra,india,3.0,bavdhan,shapoorji paloonji,vanaha,vanaha township offering bhk grand prpoerties ...,1,1,...,0,0,0,0,0,0,0,0,0,0
4,pune,maharashtra,india,2.0,mahalunge,godrej properties,godrej hills retreat,area hub prestigious schools like bishop high ...,1,1,...,0,0,0,0,0,0,0,0,0,0


In [3]:
data_cv.columns = [col.upper() for col in data_cv.columns.tolist()]

In [4]:
data_cv.columns

Index(['CITY', 'STATE', 'COUNTRY', 'PROPERTY TYPE CLEAN', 'SUB-AREA CLEAN',
       'COMPANY NAME CLEAN', 'TOWNSHIP NAME/ SOCIETY NAME CLEAN',
       'DESCRIPTION CLEAN', 'CLUBHOUSE CLEAN',
       'SCHOOL / UNIVERSITY IN TOWNSHIP CLEAN', 'HOSPITAL IN TOWNSHIP CLEAN',
       'MALL IN TOWNSHIP CLEAN', 'PARK / JOGGING TRACK CLEAN',
       'SWIMMING POOL CLEAN', 'GYM CLEAN', 'PROPERTY AREA IN SQ. FT. CLEAN',
       'PRICE IN LAKHS CLEAN', 'LOG PRICE IN LAKHS CLEAN', 'PRICE BY SUB-AREA',
       'PRICE BY COMPANY', 'PRICE BY TOWNSHIP_SOCIETY', 'AMENITIES SCORE',
       'PRICE BY AMENITIES SCORE', 'NOUN_COUNTS', 'VERB_COUNTS',
       'ADJECTIVE_COUNTS', 'COMPOUND', 'NEGATIVE', 'POSITIVE', 'NEUTRAL',
       'AMENITIES LIKE', 'BOASTS ELEGANT', 'ELEGANT TOWERS', 'GREAT COMMUNITY',
       'MANTRA GOLD', 'OFFERING BEDROOM', 'QUALITY SPECIFICATION',
       'STORIES OFFERING', 'TOWERS STORIES', 'WORLD CLASS'],
      dtype='object')

In [5]:
# Selecting only numerical features
cols_to_drop = ['CITY','STATE','COUNTRY','SUB-AREA CLEAN','TOWNSHIP NAME/ SOCIETY NAME CLEAN',
                'DESCRIPTION CLEAN','COMPANY NAME CLEAN', 'LOG PRICE IN LAKHS CLEAN']

features = data_cv.drop(cols_to_drop,axis=1).columns.tolist()
print("Final number of features: "+str(len(features)))

Final number of features: 32


In [6]:
print(features)

['PROPERTY TYPE CLEAN', 'CLUBHOUSE CLEAN', 'SCHOOL / UNIVERSITY IN TOWNSHIP CLEAN', 'HOSPITAL IN TOWNSHIP CLEAN', 'MALL IN TOWNSHIP CLEAN', 'PARK / JOGGING TRACK CLEAN', 'SWIMMING POOL CLEAN', 'GYM CLEAN', 'PROPERTY AREA IN SQ. FT. CLEAN', 'PRICE IN LAKHS CLEAN', 'PRICE BY SUB-AREA', 'PRICE BY COMPANY', 'PRICE BY TOWNSHIP_SOCIETY', 'AMENITIES SCORE', 'PRICE BY AMENITIES SCORE', 'NOUN_COUNTS', 'VERB_COUNTS', 'ADJECTIVE_COUNTS', 'COMPOUND', 'NEGATIVE', 'POSITIVE', 'NEUTRAL', 'AMENITIES LIKE', 'BOASTS ELEGANT', 'ELEGANT TOWERS', 'GREAT COMMUNITY', 'MANTRA GOLD', 'OFFERING BEDROOM', 'QUALITY SPECIFICATION', 'STORIES OFFERING', 'TOWERS STORIES', 'WORLD CLASS']


In [7]:
featuresMod = ['PROPERTYTYPE', 'CLUBHOUSE', 'SCHOOL_UNIVERSITY_IN_TOWNSHIP', 'HOSPITAL_IN_TOWNSHIP', 'MALL_IN_TOWNSHIP', 
               'PARK_JOGGING_TRACK', 'SWIMMING_POOL', 'GYM', 'PROPERTY_AREA_IN_SQ_FT', 'PRICE_IN_LAKHS', 'PRICE_BY_SUB_AREA',
               'PRICE_BY_COMPANY', 'PRICE_BY_TOWNSHIP_SOCIETY', 'AMENITIES_SCORE', 'PRICE_BY_AMENITIES_SCORE', 'NOUN_COUNTS',
               'VERB_COUNTS', 'ADJECTIVE_COUNTS', 'COMPOUND', 'NEGATIVE', 'POSITIVE', 'NEUTRAL', 'AMENITIES_LIKE', 'BOASTS_ELEGANT',
               'ELEGANT_TOWERS', 'GREAT COMMUNITY', 'MANTRA_GOLD', 'OFFERING_BEDROOM', 'QUALITY_SPECIFICATION', 'STORIES_OFFERING',
               'TOWERS_STORIES', 'WORLD_CLASS']
print(len(featuresMod))

32


In [8]:
dict(zip(features,featuresMod))

{'PROPERTY TYPE CLEAN': 'PROPERTYTYPE',
 'CLUBHOUSE CLEAN': 'CLUBHOUSE',
 'SCHOOL / UNIVERSITY IN TOWNSHIP CLEAN': 'SCHOOL_UNIVERSITY_IN_TOWNSHIP',
 'HOSPITAL IN TOWNSHIP CLEAN': 'HOSPITAL_IN_TOWNSHIP',
 'MALL IN TOWNSHIP CLEAN': 'MALL_IN_TOWNSHIP',
 'PARK / JOGGING TRACK CLEAN': 'PARK_JOGGING_TRACK',
 'SWIMMING POOL CLEAN': 'SWIMMING_POOL',
 'GYM CLEAN': 'GYM',
 'PROPERTY AREA IN SQ. FT. CLEAN': 'PROPERTY_AREA_IN_SQ_FT',
 'PRICE IN LAKHS CLEAN': 'PRICE_IN_LAKHS',
 'PRICE BY SUB-AREA': 'PRICE_BY_SUB_AREA',
 'PRICE BY COMPANY': 'PRICE_BY_COMPANY',
 'PRICE BY TOWNSHIP_SOCIETY': 'PRICE_BY_TOWNSHIP_SOCIETY',
 'AMENITIES SCORE': 'AMENITIES_SCORE',
 'PRICE BY AMENITIES SCORE': 'PRICE_BY_AMENITIES_SCORE',
 'NOUN_COUNTS': 'NOUN_COUNTS',
 'VERB_COUNTS': 'VERB_COUNTS',
 'ADJECTIVE_COUNTS': 'ADJECTIVE_COUNTS',
 'COMPOUND': 'COMPOUND',
 'NEGATIVE': 'NEGATIVE',
 'POSITIVE': 'POSITIVE',
 'NEUTRAL': 'NEUTRAL',
 'AMENITIES LIKE': 'AMENITIES_LIKE',
 'BOASTS ELEGANT': 'BOASTS_ELEGANT',
 'ELEGANT TOWERS'

In [9]:

fileName = 'model/raw_features_mapping.pkl'

with open(fileName, 'wb') as f:
    pickle.dump(dict(zip(features,featuresMod)),f)

In [10]:
import pickle
fileName = 'model/features.pkl'

with open(fileName, 'wb') as f:
    pickle.dump(featuresMod,f)

In [11]:
df_features = data_cv[features]
df_features.columns = featuresMod
print(df_features.shape)
df_features.head()

(200, 32)


Unnamed: 0,PROPERTYTYPE,CLUBHOUSE,SCHOOL_UNIVERSITY_IN_TOWNSHIP,HOSPITAL_IN_TOWNSHIP,MALL_IN_TOWNSHIP,PARK_JOGGING_TRACK,SWIMMING_POOL,GYM,PROPERTY_AREA_IN_SQ_FT,PRICE_IN_LAKHS,...,AMENITIES_LIKE,BOASTS_ELEGANT,ELEGANT_TOWERS,GREAT COMMUNITY,MANTRA_GOLD,OFFERING_BEDROOM,QUALITY_SPECIFICATION,STORIES_OFFERING,TOWERS_STORIES,WORLD_CLASS
0,1.0,1,1,1,1,1,1,1,492.0,39.0,...,0,0,0,0,0,0,0,0,0,0
1,2.0,1,1,1,1,1,1,1,774.0,65.0,...,0,0,0,0,0,0,0,0,0,0
2,3.0,1,1,1,1,1,1,1,889.0,74.0,...,0,0,0,0,0,0,0,0,0,0
3,3.0,1,1,1,1,1,1,1,1018.0,89.0,...,0,0,0,0,0,0,0,0,0,0
4,2.0,1,1,1,1,1,1,1,743.0,74.0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
df_features.isnull().sum()

PROPERTYTYPE                     0
CLUBHOUSE                        0
SCHOOL_UNIVERSITY_IN_TOWNSHIP    0
HOSPITAL_IN_TOWNSHIP             0
MALL_IN_TOWNSHIP                 0
PARK_JOGGING_TRACK               0
SWIMMING_POOL                    0
GYM                              0
PROPERTY_AREA_IN_SQ_FT           0
PRICE_IN_LAKHS                   1
PRICE_BY_SUB_AREA                0
PRICE_BY_COMPANY                 0
PRICE_BY_TOWNSHIP_SOCIETY        0
AMENITIES_SCORE                  0
PRICE_BY_AMENITIES_SCORE         0
NOUN_COUNTS                      0
VERB_COUNTS                      0
ADJECTIVE_COUNTS                 0
COMPOUND                         0
NEGATIVE                         0
POSITIVE                         0
NEUTRAL                          0
AMENITIES_LIKE                   0
BOASTS_ELEGANT                   0
ELEGANT_TOWERS                   0
GREAT COMMUNITY                  0
MANTRA_GOLD                      0
OFFERING_BEDROOM                 0
QUALITY_SPECIFICATIO

In [13]:
df_features = df_features.dropna()

In [15]:
# IDV and DV
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_features, test_size=0.30,random_state=1234)
test, val = train_test_split(test, test_size = 0.2, random_state=1234)


In [16]:
y_train = train[['PRICE_IN_LAKHS']]
x_train = train.drop('PRICE_IN_LAKHS',axis=1)

y_test = test[['PRICE_IN_LAKHS']]
x_test = test.drop('PRICE_IN_LAKHS',axis=1)

y_val = val[['PRICE_IN_LAKHS']]
x_val = val.drop('PRICE_IN_LAKHS',axis=1)

print((x_train.shape, x_test.shape, x_val.shape), (y_train.shape,y_test.shape, y_val.shape))

((139, 31), (48, 31), (12, 31)) ((139, 1), (48, 1), (12, 1))


# Linear Rigression

In [17]:
# Model training
from sklearn.linear_model import LinearRegression

linear_reg = LinearRegression()
linear_reg.fit(x_train,y_train)
print("Training R2 score: "+ str(linear_reg.score(x_train,y_train)))

Training R2 score: 0.8833200674616456


In [18]:
# Predicting on the test and evaluating the accuracy
from sklearn import metrics 
preds_lr = linear_reg.predict(x_test)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_lr)))
print("Test MAE: "+ str(metrics.mean_absolute_error(y_test,preds_lr)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_lr))))

Test R2 score: 0.9027733503107978
Test MAE: 9.248216992945052
Test RMSE: 12.160806769067609


In [19]:
pd.DataFrame(zip(linear_reg.coef_.T, x_train.columns))

Unnamed: 0,0,1
0,[10.00372019007817],PROPERTYTYPE
1,[-6.1994871000636165],CLUBHOUSE
2,[-3.234859258062903],SCHOOL_UNIVERSITY_IN_TOWNSHIP
3,[-3.23485925247508],HOSPITAL_IN_TOWNSHIP
4,[-3.23485925175447],MALL_IN_TOWNSHIP
5,[15.589338979954224],PARK_JOGGING_TRACK
6,[-8.901245634254527],SWIMMING_POOL
7,[8.101810550470725],GYM
8,[0.03765801005891607],PROPERTY_AREA_IN_SQ_FT
9,[0.06890992915494389],PRICE_BY_SUB_AREA


# Ridge

In [20]:
# Model training

ridge = Ridge()
ridge.fit(x_train,y_train)
print("Training R2 score: "+ str(ridge.score(x_train,y_train)))

Training R2 score: 0.8824288739153938


In [21]:
# Predicting on the test and evaluating the accuracy
preds_rid = ridge.predict(x_test)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_rid)))
print("Test MaAE: "+ str(metrics.mean_absolute_error(y_test,preds_rid)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_rid))))

Test R2 score: 0.9072106169921383
Test MaAE: 9.35530073725781
Test RMSE: 11.880066477373456


In [22]:
pd.DataFrame(zip(ridge.coef_.T, x_train.columns))

Unnamed: 0,0,1
0,[10.728861644534598],PROPERTYTYPE
1,[-5.132489868342229],CLUBHOUSE
2,[-1.97107877397571],SCHOOL_UNIVERSITY_IN_TOWNSHIP
3,[-1.9710787739757294],HOSPITAL_IN_TOWNSHIP
4,[-1.9710787739757218],MALL_IN_TOWNSHIP
5,[6.60429197451773],PARK_JOGGING_TRACK
6,[-6.712852385999457],SWIMMING_POOL
7,[6.540859554843211],GYM
8,[0.03775121296113824],PROPERTY_AREA_IN_SQ_FT
9,[0.08863165264580161],PRICE_BY_SUB_AREA


### GridSearchCV

In [25]:
%%time
parameters = {'alpha':[0.0001,0.001,0.01,0.1,1.0,2.0,4.0,5.0,6.0]}
model_ridge = Ridge(
    solver='auto', fit_intercept=True,
    max_iter=100, tol=0.05, random_state = 1,
)
model=GridSearchCV(estimator = model_ridge, param_grid = parameters,
                                    return_train_score=True, scoring = 'r2',
                                    cv = 5,
                                    verbose=2)
model.fit(x_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ........................................

In [26]:
model.best_estimator_

In [23]:
ridge_model = Ridge(alpha=6.0, copy_X=True, fit_intercept=True, max_iter=100, random_state=1, solver='auto', tol=0.05)
ridge_model.fit(x_train, y_train)

In [24]:
# Predicting on the test and evaluating the accuracy
preds_las = ridge_model.predict(x_test).reshape(-1,1)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_las)))
print("Test MAE: "+ str(metrics.mean_absolute_error(y_test,preds_las)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_las))))

Test R2 score: 0.9130404447492007
Test MAE: 9.118112433016076
Test RMSE: 11.500808771462058


# Lasso

In [25]:
# Model training
from sklearn.linear_model import Lasso
las = Lasso()
las.fit(x_train,y_train)
print("Training R2 score: "+ str(las.score(x_train,y_train)))

Training R2 score: 0.8688098267794647


In [26]:
# Predicting on the test and evaluating the accuracy
preds_las = las.predict(x_test).reshape(-1,1)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_las)))
print("Test MAE: "+ str(metrics.mean_absolute_error(y_test,preds_las)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_las))))

Test R2 score: 0.9226522226658003
Test MAE: 8.228662951450076
Test RMSE: 10.84660054827755


In [33]:
import pickle
fileName = 'model/lasso_model.pkl'
with open(fileName,'wb') as f:
    pickle.dump(las,f)

## SGD Regressor 

In [47]:
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV

parameters = {'alpha':[10**x for x in range(-10, 10)],
              }
model_lr_reg = SGDRegressor(loss = "squared_error",fit_intercept=False,l1_ratio=0.6)
model=GridSearchCV(estimator = model_lr_reg, param_grid = parameters,
                                    return_train_score=True, scoring = 'r2',\
                                    cv = 5,verbose=2)
model.fit(x_train,y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] END ........................................alpha=1e-10; total time=   0.0s
[CV] END ........................................alpha=1e-10; total time=   0.0s
[CV] END ........................................alpha=1e-10; total time=   0.0s
[CV] END ........................................alpha=1e-10; total time=   0.0s
[CV] END ........................................alpha=1e-10; total time=   0.0s
[CV] END ........................................alpha=1e-09; total time=   0.0s
[CV] END ........................................alpha=1e-09; total time=   0.0s
[CV] END ........................................alpha=1e-09; total time=   0.0s
[CV] END ........................................alpha=1e-09; total time=   0.0s
[CV] END ........................................alpha=1e-09; total time=   0.0s
[CV] END ........................................alpha=1e-08; total time=   0.0s
[CV] END ......................................

In [48]:
model.best_estimator_

In [50]:
from sklearn.linear_model import RidgeCV
sgd_model = SGDRegressor(alpha=1e-10, average=False, early_stopping=False, epsilon=0.1,
             eta0=0.01, fit_intercept=False, l1_ratio=0.6,
             learning_rate='invscaling', loss='squared_error', max_iter=1000,
             n_iter_no_change=5, penalty='l2', power_t=0.25, random_state=None,
             shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0,
             warm_start=False)
sgd_model.fit(x_train, y_train)

In [52]:
# Predicting on the test and evaluating the accuracy
preds_las = sgd_model.predict(x_test).reshape(-1,1)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_las)))
print("Test MAE: "+ str(metrics.mean_absolute_error(y_test,preds_las)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_las))))

Test R2 score: -1.1886444741684574e+25
Test MAE: 128855937151416.67
Test RMSE: 139240422333850.03


# LGBMRegressor

In [88]:
%%time
params={'n_estimators':[100,500,1000],'learning_rate':[0.05,0.15,0.25]}
model_lgbm= LGBMRegressor(colsample_bytree=0.8,
               min_child_samples=50)

model=GridSearchCV(estimator = model_lgbm, param_grid = params,
                                    return_train_score=True, scoring = 'r2',\
                                    cv = 3,verbose=2)
model.fit(x_train,y_train)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000114 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 198
[LightGBM] [Info] Number of data points in the train set: 106, number of used features: 7
[LightGBM] [Info] Start training from score 78.230377
[CV] END ...............learning_rate=0.05, n_estimators=100; total time=   0.0s
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 189
[LightGBM] [Info] Number of data points in the train set: 106, number of used features: 7
[LightGBM] [Info] Start training from score 90.253585
[CV] END ...............learning_rate=0.05, n_estimators=100; total time=   0.0s
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 secon

In [89]:
model.best_estimator_

In [90]:
lgbm_model = LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=0.8,
              importance_type='split', learning_rate=0.05, max_depth=-1,
              min_child_samples=50, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=1000, n_jobs=-1, num_leaves=31, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)


lgbm_model.fit(x_train, y_train)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.022181 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 331
[LightGBM] [Info] Number of data points in the train set: 159, number of used features: 14
[LightGBM] [Info] Start training from score 84.418050


In [91]:
# Predicting on the test and evaluating the accuracy
preds_las = lgbm_model.predict(x_test).reshape(-1,1)

print("Test R2 score: "+ str(metrics.r2_score(y_test,preds_las)))
print("Test MAE: "+ str(metrics.mean_absolute_error(y_test,preds_las)))
print("Test RMSE: "+ str(np.sqrt(metrics.mean_squared_error(y_test,preds_las))))

Test R2 score: 0.6768580870301357
Test MAE: 17.34306575345557
Test RMSE: 22.958102463360017


## Best Model

In [34]:
import pickle
fileName = 'model/lasso_model.pkl'
with open(fileName,'rb') as f:
    model = pickle.load(f)

In [38]:
preds_las = las.predict(x_test).reshape(-1,1)
scoring_ref_metrics = {}
scoring_ref_metrics['R2_score'] = metrics.r2_score(y_test,preds_las)
scoring_ref_metrics['RMSE'] = np.sqrt(metrics.mean_squared_error(y_test,preds_las))
scoring_ref_metrics['MAE'] = metrics.mean_absolute_error(y_test,preds_las)

In [39]:
scoring_ref_metrics

{'R2_score': 0.9226522226658003,
 'RMSE': 10.84660054827755,
 'MAE': 8.228662951450076}

In [40]:
with open('model/MODEL_XGB_PERFM_METRICS.pkl', 'wb') as F:
    pickle.dump(scoring_ref_metrics, F)

In [35]:
y_train_pred = model.predict(x_train)
train['PREDICTED_PRICE_IN_LAKHS'] = y_train_pred

y_test_pred = model.predict(x_test)
test['PREDICTED_PRICE_IN_LAKHS'] = y_test_pred

y_val_pred = model.predict(x_val)
val['PREDICTED_PRICE_IN_LAKHS'] = y_val_pred

In [37]:
train.to_csv("inputs/Predicted_train.csv", index=False)
test.to_csv("inputs/Predicted_test.csv", index=False)
val.to_csv("inputs/Predicted_val.csv", index=False)