# Geospatial Analysis

Source from most of the geo plotting code: https://www.kaggle.com/andresionek/geospatial-analysis-of-brazilian-e-commerce

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
geo = pd.read_csv("../input/ibge-censo-cep-coordenadas-renda-per-capita/cep_coordinates_per_capita_income.csv")
geo.head()

In [None]:
geo = geo[~geo["POSTCODE"].str.contains("S")]

In [None]:
geo['geolocation_zip_code_prefix_1_digits'] = geo['POSTCODE'].str[0:1]
geo['geolocation_zip_code_prefix_2_digits'] = geo['POSTCODE'].str[0:2]
geo['geolocation_zip_code_prefix_3_digits'] = geo['POSTCODE'].str[0:3]
geo['geolocation_zip_code_prefix_4_digits'] = geo['POSTCODE'].str[0:4]
geo['geolocation_zip_code_prefix'] = geo['POSTCODE'].str[0:5]

In [None]:
# transforming the prefixes to int for plotting purposes
geo['geolocation_zip_code_prefix'] = geo['geolocation_zip_code_prefix'].astype(int)
geo['geolocation_zip_code_prefix_1_digits'] = geo['geolocation_zip_code_prefix_1_digits'].astype(int)
geo['geolocation_zip_code_prefix_2_digits'] = geo['geolocation_zip_code_prefix_2_digits'].astype(int)
geo['geolocation_zip_code_prefix_3_digits'] = geo['geolocation_zip_code_prefix_3_digits'].astype(int)
geo['geolocation_zip_code_prefix_4_digits'] = geo['geolocation_zip_code_prefix_4_digits'].astype(int)

In [None]:
from datashader.utils import lnglat_to_meters as webm
x, y = webm(geo.LON, geo.LAT)
geo['x'] = pd.Series(x)
geo['y'] = pd.Series(y)

In [None]:
# plot wtih holoviews + datashader - bokeh with map background
import holoviews as hv
import geoviews as gv
import datashader as ds
from colorcet import fire, rainbow, bgy, bjy, bkr, kb, kr
from datashader.colors import colormap_select, Greys9
from holoviews.streams import RangeXY
from holoviews.operation.datashader import datashade, dynspread, rasterize
from bokeh.io import push_notebook, show, output_notebook
output_notebook()
hv.extension('bokeh')

%opts Overlay [width=800 height=600 toolbar='above' xaxis=None yaxis=None]
%opts QuadMesh [tools=['hover'] colorbar=True] (alpha=0 hover_alpha=0.2)

T = 0.05
PX = 1

def plot_map(data, label, agg_data, agg_name, cmap):
    url="http://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Dark_Gray_Base/MapServer/tile/{Z}/{Y}/{X}.png"
    geomap = gv.WMTS(url)
    points = hv.Points(gv.Dataset(data, kdims=['x', 'y'], vdims=[agg_name]))
    agg = datashade(points, element_type=gv.Image, aggregator=agg_data, cmap=cmap)
    zip_codes = dynspread(agg, threshold=T, max_px=PX)
    hover = hv.util.Dynamic(rasterize(points, aggregator=agg_data, width=50, height=25, streams=[RangeXY]), operation=hv.QuadMesh)
    hover = hover.options(cmap=cmap)
    img = geomap * zip_codes * hover
    img = img.relabel(label)
    return img

In [None]:
agg_name = 'geolocation_zip_code_prefix'

In [None]:
plot_map(geo, 'Zip Codes in Brazil', ds.mean(agg_name), agg_name, cmap=rainbow)

In [None]:
agg_name = 'renda_per_capita'
plot_map(geo, 'Renda Per Capita (R$)', ds.max(agg_name), agg_name, cmap=rainbow)

In [None]:
# São Paulo CEPs: 01001 to 19990
sp_geo = geo[(geo["geolocation_zip_code_prefix"] >= 1000) & (geo["geolocation_zip_code_prefix"] < 20000)]

In [None]:
agg_name = 'renda_per_capita'
plot_map(sp_geo, 'Renda Per Capita (R$)', ds.sum(agg_name), agg_name, cmap=rainbow)