In [138]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [139]:
# Dependencies
from pymongo import MongoClient
import pandas as pd
import numpy as np
from datetime import datetime
import re
import pickle


from urls_list import * #where all urls and paths are saved


In [140]:
import warnings
warnings.filterwarnings("ignore")

## Get the current rentals

In [156]:
client = MongoClient(db_connection_string)
records = list(client.ETLInsights["CurrentRental"].find({}, {'_id':0}))

In [157]:
DF = pd.DataFrame(records)

In [143]:
def feasibilityCheck(row):
    if (not row["FSA"]) or (not row["rental_type"]) or (not re.search('^M', row["FSA"])):
        return False
    if row["sqft"]:
        if (row["sqft"]>3000) or (row["sqft"]<200):
            return False
    return True

In [144]:
#DF['feasibility'] = DF.apply(lambda x: feasibilityCheck(x), axis=1)

In [145]:
def preprocess(DF):
    DF['feasibility'] = DF.apply(lambda x: feasibilityCheck(x), axis=1)
    DF = DF[DF['feasibility']]
    #Replace all white spaces or nothing at all to NaN
    DF.replace(r'^\s*$', np.nan, regex=True, inplace=True)
    #Replace None with NaN
    DF = DF.fillna(value=np.nan)
    #Typecast
    DF['price'] = DF['price'].astype('int') #Not required
    #Missing value handling
    DF['furnished'] = DF['furnished'].fillna(value="NOT_MENTIONED")
    DF['furnished'] = DF['furnished'].replace(to_replace=True, value='YES')
    DF['furnished'] = DF['furnished'].replace(to_replace=False, value='NO')
    #New features 
    DF['post_published_date'] = DF['post_published_date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d'))
    DF['posted_week_of_month'] = DF['post_published_date'].map(lambda x: x.day//7 +1)
    ##Convert image url to image or not? (New feature)
    DF['image'] = DF['image'].notna()
    #Basic transformation
    DF.reset_index(drop=True, inplace=True)
    #OHE
    num_columns = ['sqft', 'bedrooms', 'bathrooms', 'posted_week_of_month']
    cat_columns = ['image', 'FSA', 'rental_type', 'furnished', 'pet_friendly']
    enc = pickle.load(open('OHE.pickle', 'rb'))
    ##OHE Transform
    ohe_output = enc.transform(DF[cat_columns]).toarray()
    ohe_dict = {f'x{index}':col for index,col in enumerate(cat_columns)}
    ohe_labels = [ohe_dict[feature.split('_')[0]]+'_'+feature.split('_')[1] for feature in enc.get_feature_names()]
    DF = pd.concat([DF, pd.DataFrame(ohe_output, columns=ohe_labels)], axis=1)
    DF.drop(cat_columns, axis=1, inplace=True)
    selected_columns = ['sqft', 'bedrooms', 'bathrooms', 'image_False', 'FSA_M1B', 'FSA_M1M',
       'FSA_M1P', 'FSA_M1V', 'FSA_M1W', 'FSA_M2M', 'FSA_M3C', 'FSA_M3K',
       'FSA_M4E', 'FSA_M4V', 'FSA_M4W', 'FSA_M5G', 'FSA_M5J', 'FSA_M5R',
       'FSA_M5S', 'FSA_M5V', 'FSA_M6B', 'FSA_M6E', 'FSA_M6G', 'FSA_M6J',
       'FSA_M6K', 'FSA_M6M', 'FSA_M6P', 'rental_type_apartment',
       'rental_type_condo', 'rental_type_house', 'rental_type_loft',
       'rental_type_townhouse', 'furnished_NOT', 'furnished_YES',
       'pet_friendly_False']
    DF = DF[['id']+selected_columns]
    return DF, selected_columns
    

In [146]:
#New_DF, selected_columns = preprocess(DF)


In [147]:
def predict(DF, selected_columns):
    xgb_model = pickle.load(open('xgb_model.pickle', 'rb'))
    y_pred = xgb_model.predict(DF[selected_columns])
    DF['pred'] = y_pred
    prediction_mapping = DF[['id', 'pred']].set_index('id').T.to_dict()
    return prediction_mapping

In [148]:
#prediction_mapping = predict(New_DF,selected_columns)

In [149]:
#DF['pred'] = DF['id'].map(lambda x: prediction_mapping[x]['pred'] if x in prediction_mapping else 'Not Feasible to predict')

In [150]:
#MSE = 457.09

In [151]:
def predictPrice(DF):
    New_DF, selected_columns = preprocess(DF)
    prediction_mapping = predict(New_DF,selected_columns)
    DF['pred'] = DF['id'].map(lambda x: prediction_mapping[x]['pred'] if x in prediction_mapping else 'Not Feasible to predict')
    return DF

In [158]:
DF = predictPrice(DF)

In [159]:
DF

Unnamed: 0,id,title,price,sqft,image,url,post_published_date,lat,long,postal_code,FSA,rental_type,bedrooms,bathrooms,furnished,pet_friendly,description,source,feasibility,pred
0,c_7201637792,"Gorgeous Studio, all included Available now Sa...",975,,https://images.craigslist.org/00I0I_cAz2ubPEpt...,https://toronto.craigslist.org/tor/apa/d/sarni...,2020-09-23,42.9888,-82.3592,N7S 4R1,N7S,apartment,0,1,,False,0BR / 1Ba available now apartment laundry in b...,craigslist,False,Not Feasible to predict
1,c_7201400212,2548 Kipling Avenue (2 Bedroom) Top floors.,1900,,https://images.craigslist.org/00101_hWFMX4hdM4...,https://toronto.craigslist.org/tor/apa/d/etobi...,2020-09-23,43.7432,-79.5876,M9V 0A1,M9V,apartment,,,,False,apartment 2548 Kipling Avenue (2 Bedroom) Top ...,craigslist,True,1672.33
2,c_7201401064,2313 Islington Avenue. Newly fully renovated a...,1500,,https://images.craigslist.org/00w0w_h0PLB6n3Ac...,https://toronto.craigslist.org/tor/apa/d/etobi...,2020-09-23,43.7144,-79.5909,M9W 7K2,M9W,apartment,,,,False,apartment 2313 Islington Avenue. Newly fully r...,craigslist,True,1672.33
3,c_7201397332,"1760, 1770 and 1780 Wilson Avenue (2 Bedroom)",1850,,https://images.craigslist.org/01111_iklo7CDAqY...,https://toronto.craigslist.org/tor/apa/d/downs...,2020-09-23,43.7334,-79.5116,M3L 1S1,M3L,apartment,,,,False,"apartment 1760, 1770 and 1780 Wilson Avenue (2...",craigslist,True,1672.33
4,c_7201398151,2460-2500 Keele Street (Bachelor & 1 Bedroom),1250,,https://images.craigslist.org/00C0C_9MyyIc46N1...,https://toronto.craigslist.org/tor/apa/d/north...,2020-09-23,43.7137,-79.4869,M6L 1R7,M6L,apartment,,,,False,apartment 2460-2500 Keele Street (Bachelor & 1...,craigslist,True,1672.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2870,c_7225377030,"2 BEDROOM - OPEN LIV/KIT/DIN, LARGE CLOSETS, D...",1500,,https://images.craigslist.org/00q0q_7sRem6tRqA...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-11-04,43.6683,-79.4205,M6G 3B5,M6G,apartment,2,1,,False,2BR / 1Ba available dec 1 apartment/ 2br -356 ...,craigslist,True,1766.45
2871,c_7219740847,"FAB 3 STOREY THREE BEDROOM, OPEN LIV/DIN, HUGE...",2700,,https://images.craigslist.org/00Y0Y_2UORaTGdts...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-10-24,43.6683,-79.4205,M6G 3B5,M6G,house,3,1,,False,3BR / 1Ba available dec 1 apartment/ 3br -Litt...,craigslist,True,2724.78
2872,c_7219739938,"FAB 3 STOREY TWO BEDROOM , OPEN LIV/DIN, HUGE ...",2700,,https://images.craigslist.org/00Y0Y_2UORaTGdts...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-10-24,43.6683,-79.4205,M6G 3B5,M6G,house,2,1,,False,2BR / 1Ba available dec 1 apartment/ 2br -Litt...,craigslist,True,2449.63
2873,c_7218996538,"FAB 3 STOREY TWO BEDROOM , OPEN LIV/DIN, HUGE ...",2700,,https://images.craigslist.org/00Y0Y_2UORaTGdts...,https://toronto.craigslist.org/tor/apa/d/downt...,2020-10-23,43.6683,-79.4205,M6G 3B5,M6G,house,2,1,,False,2BR / 1Ba available dec 1 apartment/ 2br -Litt...,craigslist,True,2449.63
