In [8]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [9]:
# Dependencies
from pymongo import MongoClient
import pandas as pd
import numpy as np
from datetime import datetime
import re
import pickle


from urls_list import * #where all urls and paths are saved


In [10]:
import warnings
warnings.filterwarnings("ignore")


## Get the current rentals

In [11]:
client = MongoClient(db_connection_string)
records = list(client.ETLInsight["HistoricRental"].find({}, {'_id':0}))

In [12]:
DF = pd.DataFrame(records)

In [None]:
def feasibilityCheck(row):
    if (not row["FSA"]) or (not row["rental_type"]) or (not re.search('^M', row["FSA"])):
        return False
    if row["sqft"]:
        if (row["sqft"]>3000) or (row["sqft"]<200):
            return False
    return True

In [None]:
#DF['feasibility'] = DF.apply(lambda x: feasibilityCheck(x), axis=1)

In [None]:
def preprocess(DF):
    feasibility = DF.apply(lambda x: feasibilityCheck(x), axis=1)
    DF = DF[feasibility]
    #Replace all white spaces or nothing at all to NaN
    DF.replace(r'^\s*$', np.nan, regex=True, inplace=True)
    #Replace None with NaN
    DF = DF.fillna(value=np.nan)
    #Typecast
    DF['price'] = DF['price'].astype('int') #Not required
    #Missing value handling
    DF['furnished'] = DF['furnished'].fillna(value="NOT_MENTIONED")
    DF['furnished'] = DF['furnished'].replace(to_replace=True, value='YES')
    DF['furnished'] = DF['furnished'].replace(to_replace=False, value='NO')
    #New features 
    DF['post_published_date'] = DF['post_published_date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d'))
    DF['posted_week_of_month'] = DF['post_published_date'].map(lambda x: x.day//7 +1)
    ##Convert image url to image or not? (New feature)
    DF['image'] = DF['image'].notna()
    #Basic transformation
    DF.reset_index(drop=True, inplace=True)
    #OHE
    num_columns = ['sqft', 'bedrooms', 'bathrooms', 'posted_week_of_month']
    cat_columns = ['image', 'FSA', 'rental_type', 'furnished', 'pet_friendly']
    enc = pickle.load(open('OHE.pickle', 'rb'))
    ##OHE Transform
    ohe_output = enc.transform(DF[cat_columns]).toarray()
    ohe_dict = {f'x{index}':col for index,col in enumerate(cat_columns)}
    ohe_labels = [ohe_dict[feature.split('_')[0]]+'_'+feature.split('_')[1] for feature in enc.get_feature_names()]
    DF = pd.concat([DF, pd.DataFrame(ohe_output, columns=ohe_labels)], axis=1)
    DF.drop(cat_columns, axis=1, inplace=True)
    selected_columns = ['sqft', 'bedrooms', 'bathrooms', 'image_False', 'FSA_M1B', 'FSA_M1M',
       'FSA_M1P', 'FSA_M1V', 'FSA_M1W', 'FSA_M2M', 'FSA_M3C', 'FSA_M3K',
       'FSA_M4E', 'FSA_M4V', 'FSA_M4W', 'FSA_M5G', 'FSA_M5J', 'FSA_M5R',
       'FSA_M5S', 'FSA_M5V', 'FSA_M6B', 'FSA_M6E', 'FSA_M6G', 'FSA_M6J',
       'FSA_M6K', 'FSA_M6M', 'FSA_M6P', 'rental_type_apartment',
       'rental_type_condo', 'rental_type_house', 'rental_type_loft',
       'rental_type_townhouse', 'furnished_NOT', 'furnished_YES',
       'pet_friendly_False']
    DF = DF[['id']+selected_columns]
    return DF, selected_columns
    

In [None]:
#New_DF, selected_columns = preprocess(DF)


In [None]:
def predict(DF, selected_columns):
    xgb_model = pickle.load(open('xgb_model.pickle', 'rb'))
    y_pred = xgb_model.predict(DF[selected_columns])
    DF['pred'] = y_pred
    prediction_mapping = DF[['id', 'pred']].set_index('id').T.to_dict()
    return prediction_mapping

In [None]:
#prediction_mapping = predict(New_DF,selected_columns)

In [None]:
#DF['pred'] = DF['id'].map(lambda x: prediction_mapping[x]['pred'] if x in prediction_mapping else 'Not Feasible to predict')

In [None]:
#MSE = 457.09

In [None]:
def predictPrice(DF):
    New_DF, selected_columns = preprocess(DF)
    prediction_mapping = predict(New_DF,selected_columns)
    DF['pred'] = DF['id'].map(lambda x: prediction_mapping[x]['pred'] if x in prediction_mapping else 'Not Feasible to predict')
    return DF

In [None]:
DF = predictPrice(DF)

In [None]:
DF

In [None]:
len(records)

## Update the DB with pred value

In [None]:
# for index,row in DF.iterrows():
#     #print(row['id'], row['pred'])
#     query = {'id': row['id']}
#     client.ETLInsight["HistoricRental"].update_one(query, {"$set": {"pred": row['pred']}})

In [14]:
client = MongoClient(db_connection_string)
records = list(client.ETLInsight["CurrentRental"].find({}, {'_id':0}))

In [15]:
DF = pd.DataFrame(records)

In [16]:
DF

Unnamed: 0,id,title,price,sqft,image,url,post_published_date,lat,long,postal_code,FSA,rental_type,bedrooms,bathrooms,furnished,pet_friendly,description,source,pred
0,c_7202750011,"2 Bedroom, 2 Bathroom - Boutique II",2750,,https://images.craigslist.org/01616_hbR46WGFjY...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-09-25,43.646067,-79.385474,M5J 1H8,M5J,apartment,2,2,False,False,2BR / 2Ba available oct 15 apartment w/d in un...,craigslist,2915.22
1,c_7202745971,"1 Bed/room, 1 Bathroom - Boutique 2",1800,,https://images.craigslist.org/00P0P_1L9TKA9cQH...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-09-25,43.646067,-79.385474,M5J 1H8,M5J,condo,1,1,False,False,1BR / 1Ba available oct 1 condo w/d in unit/ 1...,craigslist,1953.22
2,c_7202739547,"1 Bedroom, 1 Bathroom - Boutique 2 Condo",1850,,https://images.craigslist.org/01313_4YxX8zhMSe...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-09-25,43.646067,-79.385474,M5J 1H8,M5J,condo,1,1,False,False,1BR / 1Ba available now condo w/d in unit/ 1br...,craigslist,1953.22
3,c_7202742727,"1 Bed/room, 1 Bathroom - Boutique 2",1950,,https://images.craigslist.org/00n0n_QNIkGEekZg...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-09-25,43.646067,-79.385474,M5J 1H8,M5J,apartment,1,1,False,False,1BR / 1Ba available nov 1 apartment w/d in uni...,craigslist,1845.07
4,c_7202759816,1+1 Bedroom / 1 Bathroom - Boutique 2,2200,,https://images.craigslist.org/00000_3hsx9Gkt2W...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-09-25,43.652102,-79.388075,M5T 3M3,M5T,condo,1,1,False,False,1BR / 1Ba available nov 1 condo w/d in unit/ 1...,craigslist,1926.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2886,c_7222210872,"2 BEDROOM OPEN CONCEPT LIVING, DINING, KITCHEN...",1475,,https://images.craigslist.org/00p0p_blnT35BELp...,https://toronto.craigslist.org/tor/apa/d/toron...,2020-10-29,43.638300,-79.430100,M6K 1G7,M6K,apartment,2,1,,False,2BR / 1Ba available now apartment/ 2br -2 BEDR...,craigslist,1611.8
2887,c_7227283074,Unique Opportunity:Fully Furnished Executive 1...,2090,700,https://images.craigslist.org/00D0D_aTZRTCXcyr...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-11-07,43.640400,-79.399500,M5V 0E4,M5V,condo,2,1,True,True,2BR / 1Ba 700ft2 available now dogs are OK - w...,craigslist,2654.97
2888,c_7227267467,"LRG LIV RM, EAT-IN KIT, D/W, HUGE CLOSETS, DEC...",2475,,https://images.craigslist.org/01313_dAKPvNpHYO...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-11-07,43.668300,-79.420500,M6G 3B5,M6G,apartment,3,1,,False,"3BR / 1Ba available jan 1, 2021 apartment/ 3br...",craigslist,2500.8
2889,c_7227265767,"2 BEDROOM - OPEN LIV/KIT/DIN, LARGE CLOSETS, D...",1450,,https://images.craigslist.org/00q0q_7sRem6tRqA...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-11-07,43.668300,-79.420500,M6G 3B5,M6G,apartment,2,1,,False,2BR / 1Ba available dec 1 apartment/ 2br -356 ...,craigslist,1766.45
