In [1]:
import plotly.express as px
from shapely.geometry import Polygon, MultiPolygon
import geopandas as gpd
import matplotlib.pyplot as plt
import geojsoncontour

In [2]:
import pandas as pd
import numpy as np
import re
import json
from datetime import datetime

from scipy.interpolate import griddata
from numpy import linspace

In [3]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [4]:
# Cufflinks wrapper on plotly
import cufflinks
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot
cufflinks.go_offline()

# Set global theme

import plotly.figure_factory as ff

import plotly.graph_objects as go
#Selecting a central city point to center all graphs around - Swietokrzyska Subway 
center_coors=52.235176, 21.008393

In [5]:
import random
import warnings
warnings.filterwarnings("ignore")

In [6]:
df = pd.read_excel(r"https://raw.githubusercontent.com/Jan-Majewski/Project_Portfolio/master/03_Real_Estate_pricing_in_Warsaw/RE_models_input_enriched.xlsx")

In [7]:
df.columns

Index(['Id', 'Area', 'Price', 'latitude', 'longitude', 'build_year',
       'building_floors_num', 'rooms_num', 'City', 'subdistrict', 'market',
       'Building_material', 'Building_ownership', 'Building_type',
       'Construction_status', 'floor_no', 'Heating', 'Windows_type',
       'Equipment_types_dishwasher', 'Equipment_types_fridge',
       'Equipment_types_furniture', 'Equipment_types_oven',
       'Equipment_types_stove', 'Equipment_types_tv',
       'Equipment_types_washing_machine', 'Extras_types_air_conditioning',
       'Extras_types_attic', 'Extras_types_balcony', 'Extras_types_basement',
       'Extras_types_garage', 'Extras_types_garden', 'Extras_types_lift',
       'Extras_types_separate_kitchen', 'Extras_types_terrace',
       'Extras_types_two_storey', 'Extras_types_usable_room',
       'Media_types_cable-television', 'Media_types_electricity',
       'Media_types_internet', 'Media_types_phone', 'Media_types_sewage',
       'Media_types_water', 'Security_types_alarm

In [8]:
base_columns=['Id', 'Area', 'Price', 'latitude', 'longitude', 'build_year',
       'market', 'district', 'unit_price', 'lon_mod', 'lat_mod', 'grid_price',
       'sample_size', 'distance_transit_8AM', 'time_transit_8AM',
       'distance_driving_8AM', 'time_driving_8AM',
       'distance_return_transit_5PM', 'time_return_transit_5PM',
       'distance_return_driving_5PM', 'time_return_driving_5PM',
       'restaurant_price_level', 'restaurant_mean_rating',
       'restaurant_mean_popularity', 'restaurant_count']

In [9]:
df=df[base_columns]

In [10]:
df.shape

(11788, 25)

In [11]:
df_map=df.groupby(['lon_mod', 'lat_mod'],as_index=False).mean()

In [38]:
df_map.sort_values(by=["lon_mod","lat_mod"],inplace=True)

In [39]:
df_map.columns

Index(['lon_mod', 'lat_mod', 'Id', 'Area', 'Price', 'latitude', 'longitude',
       'build_year', 'unit_price', 'grid_price', 'sample_size',
       'distance_transit_8AM', 'time_transit_8AM', 'distance_driving_8AM',
       'time_driving_8AM', 'distance_return_transit_5PM',
       'time_return_transit_5PM', 'distance_return_driving_5PM',
       'time_return_driving_5PM', 'restaurant_price_level',
       'restaurant_mean_rating', 'restaurant_mean_popularity',
       'restaurant_count'],
      dtype='object')

In [None]:
""

In [None]:
df_map["hover_text"]

# Dividing data into deciles

In [40]:
# Highest values should receive top score
normal_buckets=['restaurant_price_level',
       'restaurant_mean_rating', 'restaurant_mean_popularity',
       'restaurant_count','build_year']

In [41]:
normal_labels=np.ones([100,])
for i in range(0,normal_labels.shape[0]):
    normal_labels[i]=i+1

In [42]:
# Lowest values should receive top score
reversed_buckets=['unit_price','distance_transit_8AM', 'time_transit_8AM', 'distance_driving_8AM',
       'time_driving_8AM', 'distance_return_transit_5PM',
       'time_return_transit_5PM', 'distance_return_driving_5PM',
       'time_return_driving_5PM',"Price","Area"]

In [43]:
reversed_labels=np.ones([100,])
for i in range(0,reversed_labels.shape[0]):
    reversed_labels[i]=normal_labels[99-i]


In [44]:
noise=np.random.rand(df_map.shape[0],df_map.shape[1]-3)/10000

In [45]:
df_map.iloc[:,3:]=df_map.iloc[:,3:]+noise

In [46]:
df_dec=df_map.iloc[:,:3]

In [47]:
bin_count=10

for feature in normal_buckets:
    df_dec[feature]=pd.qcut(df_map[feature],100,labels=normal_labels).astype(int)
    
for feature in reversed_buckets:
    df_dec[feature]=pd.qcut(df_map[feature],100,labels=reversed_labels).astype(int)

In [48]:
df_dec["filter_unit_price"]=df_map.unit_price
df_dec["filter_transit_time"]=(df_map.time_transit_8AM+df_map.time_return_transit_5PM)/2
df_dec["filter_driving_time"]=(df_map.time_driving_8AM+df_map.time_return_driving_5PM)/2
df_dec["filter_distance"]=df_map.distance_driving_8AM

# Creating key KPIs

## Transport KPI

In [49]:
car_ratio=0.2
commute_ratio=1-car_ratio

In [50]:
df_dec["KPI_transport"]=df_dec.time_driving_8AM*car_ratio+df_dec.time_transit_8AM*commute_ratio

## Culture KPI

In [51]:
restaurant_popularity_share=0.2
restaurant_rating_share=0.3
restaurant_count_share=0.4
restaurant_price_share=1-restaurant_popularity_share-restaurant_rating_share-restaurant_count_share

In [52]:
df_dec["KPI_Culture"]=df_dec.restaurant_mean_popularity*restaurant_popularity_share\
                        +df_dec.restaurant_mean_rating*restaurant_rating_share\
                        +df_dec.restaurant_count*restaurant_count_share\
                        +df_dec.restaurant_price_level*restaurant_price_share

## Price KPI

In [53]:
unit_price_ratio=1
abs_price_ratio=1-unit_price_ratio

In [54]:
df_dec["KPI_Price"]=df_dec.unit_price*unit_price_ratio+df_dec.Price*abs_price_ratio

In [55]:
df_dec[["KPI_Price","KPI_transport","KPI_Culture"]]

Unnamed: 0,KPI_Price,KPI_transport,KPI_Culture
0,76,7.0,47.2
1,75,4.0,25.6
2,56,6.0,45.0
3,81,31.2,35.7
4,81,15.0,79.4
...,...,...,...
405,77,11.2,28.9
406,74,27.8,85.0
407,75,12.8,35.2
408,70,10.4,29.2


# Visualizing optimal area

In [66]:
x_range=[df_map.lon_mod.min(),df_map.lon_mod.max()]
y_range=[df_map.lat_mod.min(),df_map.lat_mod.max()]


In [92]:
def create_grid_data(x,y,z,grid_size):
    xi = linspace(x.min(),x.max(),grid_size);
    yi = linspace(y.min(),y.max(),grid_size);
    zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method="linear")
    
    return(xi,yi,zi)


In [138]:
def create_grid_map(x,y,z,grid_size,threshold,operator="below"):
    xi = linspace(x.min(),x.max(),grid_size);
    yi = linspace(y.min(),y.max(),grid_size);
    zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method="linear")
    
    if operator=="below":
        zi_map=np.where(zi<=threshold,1, np.NaN)
    else:
         zi_map=np.where(zi>=threshold,1, np.NaN)
    
    return(zi_map)

In [139]:
zi=create_grid_data(df_map.lon_mod,df_map.lat_mod,df_map.unit_price,50)[2]

In [140]:
zi_map=create_grid_map(df_map.lon_mod,df_map.lat_mod,df_map.unit_price,50,12000,operator="below")

In [209]:
def create_geojson(x,y,z, rescale,grid_size):
    xi = linspace(x.min(),x.max(),grid_size);
    yi = linspace(y.min(),y.max(),grid_size);
    zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method="linear")

   
    cs = plt.contourf(xi,yi,zi,range(int(np.nanmin(zi)),int(np.nanmax(zi))+5,5),cmap=plt.cm.jet)
    plt.close()
    
    
    
    

    geojson = geojsoncontour.contourf_to_geojson(
    contourf=cs,
    ndigits=3,
   
    )

    json_dict=eval(geojson)
    
    arr_temp=np.ones([len(json_dict["features"]),2])
    
    for i in range(0, len(json_dict["features"])):
        json_dict["features"][i]["id"]=i

    
        arr_temp[i,0]=i
        arr_temp[i,1]=float(json_dict["features"][i]["properties"]["title"])/rescale
    
    df_contour=pd.DataFrame(arr_temp, columns=["Id","value"])
   
    
    return(json_dict, df_contour)

In [219]:
def create_geojson_with_filter(x,y,z, rescale,filter_map,grid_size):
    xi = linspace(x.min(),x.max(),grid_size);
    yi = linspace(y.min(),y.max(),grid_size);
    zi = griddata((x, y), z, (xi[None,:], yi[:,None]), method="linear")
    
    zi=zi*filter_map

   
    cs = plt.contourf(xi,yi,zi,range(int(np.nanmin(zi)),int(np.nanmax(zi))+5,5),cmap=plt.cm.jet)
    plt.close()
    
    
    
    

    geojson = geojsoncontour.contourf_to_geojson(
    contourf=cs,
    ndigits=3,
   
    )

    json_dict=eval(geojson)
    
    arr_temp=np.ones([len(json_dict["features"]),2])
    
    for i in range(0, len(json_dict["features"])):
        json_dict["features"][i]["id"]=i

    
        arr_temp[i,0]=i
        arr_temp[i,1]=float(json_dict["features"][i]["properties"]["title"])/rescale
    
    df_contour=pd.DataFrame(arr_temp, columns=["Id","value"])
   
    
    return(json_dict, df_contour)

In [220]:
def plot_geojson_countour(geojson, df, title):
    trace = go.Choroplethmapbox(
                                geojson=geojson,
                                locations=df.Id,
                                z=df.value,
                                colorscale="jet",
                                zauto=False,
                                zmax=5,


                                marker_line_width=0,

                                marker=dict(opacity=0.5),

                               )
    layout = go.Layout(
        title=title,




        height = 800,
        # top, bottom, left and right margins
        margin = dict(t = 80, b = 0, l = 0, r = 0),
        font = dict(color = 'dark grey', size = 18),

        mapbox = dict(


            center = dict(
                lat = center_coors[0],
                lon = center_coors[1]
            ),
            # default level of zoom
            zoom = 10,
            # default map style
            style = "carto-positron"
        )

    )

    figure=dict(
        data=[trace],
        layout=layout,


        )

    iplot(figure)

In [337]:
def plot_geojson_countour_with_points(geojson, df, title,trace_annotate):
    trace = go.Choroplethmapbox(
                                geojson=geojson,
                                locations=df.Id,
                                z=df.value,
                                colorscale="jet",
                                zauto=False,
                                zmax=5,


                                marker_line_width=0,

                                marker=dict(opacity=0.5),

                               )


    
    layout = go.Layout(
        title=title,




        height = 800,
        # top, bottom, left and right margins
        margin = dict(t = 80, b = 0, l = 0, r = 0),
        font = dict(color = 'dark grey', size = 18),

        mapbox = dict(


            center = dict(
                lat = center_coors[0],
                lon = center_coors[1]
            ),
            # default level of zoom
            zoom = 10,
            # default map style
            style = "carto-positron"
        )

    )

    figure=dict(
        data=[trace,trace_annotate],
        layout=layout,


        )

    iplot(figure)

## Plotting 3 key KPIs

In [339]:
json_dict_price, df_contour_price=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Price,20,100)
plot_geojson_countour(json_dict_price, df_contour_price, "Price KPI heatmap")

In [374]:
json_dict_transport, df_contour_transport=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_transport,20,50)
plot_geojson_countour(json_dict_transport, df_contour_transport, "Transport Heatmap")

In [377]:
json_dict_culture, df_contour_culture=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Culture,20,50)
plot_geojson_countour(json_dict_culture, df_contour_culture, "Culture Heatmap")

## Price KPI heatmap with filter

In [378]:
df_dec.columns

Index(['lon_mod', 'lat_mod', 'Id', 'restaurant_price_level',
       'restaurant_mean_rating', 'restaurant_mean_popularity',
       'restaurant_count', 'build_year', 'unit_price', 'distance_transit_8AM',
       'time_transit_8AM', 'distance_driving_8AM', 'time_driving_8AM',
       'distance_return_transit_5PM', 'time_return_transit_5PM',
       'distance_return_driving_5PM', 'time_return_driving_5PM', 'Price',
       'Area', 'filter_unit_price', 'filter_transit_time',
       'filter_driving_time', 'filter_distance', 'KPI_transport',
       'KPI_Culture', 'KPI_Price'],
      dtype='object')

In [387]:
price_filter=create_grid_map(df_dec.lon_mod,df_dec.lat_mod,df_dec.filter_unit_price,100,12000,operator="below")
drivetime_filter=create_grid_map(df_dec.lon_mod,df_dec.lat_mod,df_dec.filter_driving_time,100,20,operator="below")

In [388]:
np.nansum(price_filter)

5674.0

In [389]:
filter_combined=price_filter*drivetime_filter

In [390]:
np.nansum(filter_combined)

1934.0

In [391]:
json_dict_price, df_contour_price=create_geojson_with_filter(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Price,20,filter_combined,100)

In [392]:
plot_geojson_countour(json_dict_price, df_contour_price, "Price KPI heatmap")

## Adding annotation

In [395]:

    trace_annotate=go.Scattermapbox(
                lat=df_map.lat_mod,
                lon=df_map.lon_mod,
                name="",

            
                hovertemplate= 
                "<b>Price: %{marker.color:,.2f} PLN/m2</br></br>"+
                '<b>%{text}</br>',
                   
                

                mode='markers',
                marker=dict(
                    color=df_map.unit_price,
                    colorscale="jet",
                    cauto=False,
                    cmax=20000,
                    opacity=0.1, 
                    size=10),
        
        
                    text=['Drivetime:{:.2f}min</br>Restaurants count:{:.2f}'.format(df_map.time_driving_8AM[i],df_map.restaurant_count[i]) for i in range(0,df_map.shape[0])]
       


                )

    
    layout = go.Layout(
       




        height = 800,
        # top, bottom, left and right margins
        margin = dict(t = 80, b = 0, l = 0, r = 0),
        font = dict(color = 'dark grey', size = 18),

        mapbox = dict(


            center = dict(
                lat = center_coors[0],
                lon = center_coors[1]
            ),
            # default level of zoom
            zoom = 10,
            # default map style
            style = "carto-positron"
        )

    )

    figure=dict(
        data=[trace_annotate],
        layout=layout,


        )

    iplot(figure)

In [396]:
plot_geojson_countour_with_points(json_dict_price, df_contour_price, "Price KPI heatmap with annotation",trace_annotate)

# Final score

## Scenarion A: Classic Kowalski
 - Median wage employee
 - High price sensivity
 - Prefers transport by car
 - Low interested in culture and dining out
 - Budget cap of 10k/m2
 - Drivetime cap of 30 mins

 


In [38]:
price_share=0.7
transport_share=0.2
culture_share=1-price_share-transport_share

In [39]:
df_dec["KPI_Summary"]=df_dec.KPI_Price*price_share+df_dec.KPI_transport*transport_share+df_dec.KPI_Culture*culture_share

In [40]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [41]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location")

### Filter by price

In [42]:
price_limit=10000

In [43]:
df_dec["KPI_Summary"]=np.where(df_dec.filter_unit_price<=price_limit,df_dec.KPI_Summary,np.NaN)

In [44]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [45]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location within budget")

### Filter by drivetime to center

In [46]:
drivetime_limit=30

In [47]:
df_dec["KPI_Summary"]=np.where(df_dec.filter_driving_time<=drivetime_limit,df_dec.KPI_Summary,np.NaN)

In [48]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [49]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location within budget and drivetime")

## Scenario B: Aspiring professional
 - High wage
 - Low price sensivity
 - High focus on short commute to work
 - High interested in culture and dining out
 - Budget cap of 15k/m2
 - Drivetime cap of 15 mins


In [50]:
price_share=0.3
transport_share=0.5
culture_share=1-price_share-transport_share

In [51]:
df_dec["KPI_Summary"]=df_dec.KPI_Price*price_share+df_dec.KPI_transport*transport_share+df_dec.KPI_Culture*culture_share

In [52]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [53]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location")

### Filter by price

In [54]:
price_limit=15000

In [55]:
df_dec["KPI_Summary"]=np.where(df_dec.filter_unit_price<=price_limit,df_dec.KPI_Summary,np.NaN)

In [56]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [57]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location within budget")

### Filter by drivetime to center

In [58]:
drivetime_limit=20

In [59]:
df_dec["KPI_Summary"]=np.where(df_dec.filter_driving_time<=drivetime_limit,df_dec.KPI_Summary,np.NaN)

In [60]:
json_dict_summary, df_contour_summary=create_geojson(df_dec.lon_mod,df_dec.lat_mod,df_dec.KPI_Summary,20)

In [61]:
plot_geojson_countour(json_dict_summary, df_contour_summary, "Optimal location within budget and drivetime")