### Visualization of the residuals and important features

In [1]:
%matplotlib inline
import json
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

Open json file that contains the geometry of the zipcodes

In [2]:
with open('./data/fl_florida_zip_codes_geo.min.json') as response:
    zipcodes = json.load(response)
    

In [56]:
df = pd.read_csv('./data/Tampa_df_predict_2018_w2016.csv',   index_col=0)
df.head()

Unnamed: 0,zri_label,RegionName,zri,year,gini_index,income_per_capita,median_age,median_income,family_households,nonfamily_households,...,rent_under10,rent_tento50,rent_over50,rent_uncomputed,females,age_under_18,age_18to60,age_over_60,water_land_ratio,residuals
0,7.250636,33647,7.198184,2016,0.4592,39069.0,34.3,76755.0,15828.0,7224.0,...,342.0,6910.0,2115.0,421.0,32180.0,17619.0,37924.0,6737.0,0.044168,0.019384
1,7.16472,33511,7.119636,2016,0.4016,27653.0,35.8,56077.0,13705.0,7248.0,...,170.0,7808.0,2225.0,325.0,28316.0,13432.0,31684.0,8153.0,0.054533,0.007277
2,7.141245,33578,7.096721,2016,0.4081,27870.0,33.0,58334.0,11374.0,6067.0,...,127.0,6416.0,1470.0,418.0,24101.0,12417.0,28289.0,4816.0,0.114368,0.005334
3,6.954639,34668,6.930495,2016,0.4097,18412.0,46.2,33697.0,10721.0,7452.0,...,98.0,4092.0,1680.0,560.0,23504.0,9289.0,21557.0,12361.0,0.167757,-0.025077
4,7.267525,34698,7.201916,2016,0.4639,30273.0,53.1,45254.0,9436.0,8175.0,...,148.0,4290.0,1343.0,376.0,20070.0,6477.0,16771.0,13244.0,0.503604,0.032767


In [57]:
df = df.rename(columns = {'RegionName':'ZipCode'})

In [58]:
df['residuals'].describe()

count    1469.000000
mean       -0.001093
std         0.029582
min        -0.240476
25%        -0.015988
50%        -0.001194
75%         0.014399
max         0.101179
Name: residuals, dtype: float64

In [59]:
#residuals are in such a small scale that its hard to see the differences in the map
df['residualsX100']= df['residuals']*100

In [60]:
# to have an easier interpretation of the residuals map, use qualitative tick labels instead
# of  the actual residual numbers
fig = px.choropleth_mapbox(df, geojson=zipcodes, locations='ZipCode',
                           color='residualsX100',
                           featureidkey='properties.ZCTA5CE10',
                           color_continuous_scale="Plasma",
                           range_color=(-9,9),
                           mapbox_style="carto-positron",
                           zoom=8, center = {"lat": 28.2189,
                                             "lon": -82.457},
                           opacity=0.5, title= "Tampa metro"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Residuals",
                                         tickvals=[-8,0,8],
    ticktext=["Overestimated", "Accurately predicted", "Underestimated"]))
fig.write_html("./maps/Tampa_residuals.html")

In [40]:
fig = px.choropleth_mapbox(df, geojson=zipcodes, locations='ZipCode',
                           color='income_per_capita',
                           featureidkey='properties.ZCTA5CE10',
                           color_continuous_scale=px.colors.sequential.speed[::-1],
                           range_color=(10000,80000),
                           mapbox_style="carto-positron",
                           zoom=8, center = {"lat": 28.2189,
                                             "lon": -82.457},
                           opacity=0.5, title= "Tampa metro"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Income per capita ($)"))
fig.write_html("./maps/Tampa_income_per_capita.html")

In [48]:
fig = px.choropleth_mapbox(df, geojson=zipcodes, locations='ZipCode',
                           color='median_age',
                           featureidkey='properties.ZCTA5CE10',
                           color_continuous_scale=px.colors.sequential.ice[::-1],
                           mapbox_style="carto-positron",
                           zoom=8, center = {"lat": 28.2189,
                                             "lon": -82.457},
                           opacity=0.8, title= "Tampa metro"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Median age"))
fig.write_html("./maps/Tampa_median_age.html")

In [55]:
fig = px.choropleth_mapbox(df, geojson=zipcodes, locations='ZipCode',
                           color='water_land_ratio',
                           featureidkey='properties.ZCTA5CE10',
                           color_continuous_scale='Earth',
                           mapbox_style="carto-positron",
                           zoom=8, center = {"lat": 28.2189,
                                             "lon": -82.457},
                           opacity=0.8, title= "Tampa metro"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Water to land proportion"))
fig.write_html("./maps/Tampa_water_to_land.html") 
