In [1]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox

import seaborn as sns
import matplotlib.pyplot as plt

from pathlib import Path 

current_path = Path().resolve()
abs_path = str(current_path.parent)
sys.path.append(abs_path)

In [2]:
from h3 import h3
import folium
import json
from geojson.feature import *

In [3]:
import scripts.pot_holes_cost as phc
from scripts.h3_explorer import *

In [4]:
RAW_PATH = current_path.parent / 'data' / 'raw'
OUTPUT_PATH = current_path.parent / 'data' / 'output'

# Importing Data

In [5]:
df_miraflores = pd.read_csv(RAW_PATH/'Miraflores'/'Athena-flood-Miraflores.csv')
df_montevideo = pd.read_csv(RAW_PATH/'Montevideo'/'Athena-flood-Montevideo.csv')
df_rio = pd.read_csv(RAW_PATH/'Rio de Janeiro'/'Athena-flood-Rio.csv')

In [6]:
dfs_dict = {"montevideo": df_montevideo, "miraflores": df_miraflores, "rio de janeiro": df_rio}

In [7]:
df_miraflores.head()

Unnamed: 0,uuid,latitude,longitude,interactions,street,reliability,start_time,end_time,share,cum_share
0,5f8a9ee4-cbfd-3b1e-b10f-861d0c1af10b,-12.132533,-77.031029,25,Vía Expresa Costa Verde,10,2019-02-03 03:25:00.000,2019-02-03 04:51:00.000,0.0229,0.022915
1,c94d5b4a-974f-3550-ab60-39a507c0bf5c,-12.133135,-77.030389,23,Vía Expresa Costa Verde,10,2019-02-09 00:56:00.000,2019-02-09 01:42:00.000,0.0211,0.043996
2,a388e8ae-f3d9-3658-9f14-2e9db42892a4,-12.131729,-77.029708,21,Av. Armendáriz,10,2019-05-14 20:34:00.000,2019-05-14 21:52:00.000,0.0192,0.063245
3,96a3dd01-3d0d-325a-8cb3-c5025e79db20,-12.132819,-77.030705,20,Vía Expresa Costa Verde,10,2019-03-01 03:53:00.000,2019-03-01 04:40:00.000,0.0183,0.081577
4,3a936507-35e4-3efb-99d4-a94dda397ac5,-12.132326,-77.031355,18,Vía Expresa Costa Verde,10,2019-02-09 02:32:00.000,2019-02-09 04:04:00.000,0.0165,0.098075


In [8]:
df_montevideo.head()

Unnamed: 0,uuid,latitude,longitude,interactions,street,reliability,start_time,end_time,share,cum_share
0,b336ef4a-a85c-3cac-b448-75e50cc23d7c,-34.878758,-56.071303,36,Av. Italia,10,2019-01-07 01:56:00.000,2019-01-07 04:16:00.000,0.0849,0.084906
1,2606c07f-96a3-3fef-817d-e5b087574ef8,-34.863587,-56.169005,10,,10,2019-01-02 14:05:00.000,2019-01-02 15:42:00.000,0.0236,0.108491
2,8f5b9166-c54f-3cdf-9318-8f64a40d2a72,-34.878743,-56.071234,10,Av. Italia,10,2019-07-26 12:10:00.000,2019-07-26 14:14:00.000,0.0236,0.132075
3,171f4421-d891-3155-976f-25f06cf32b61,-34.877649,-56.06314,7,Av. Italia,10,2019-03-12 19:31:00.000,2019-03-12 21:03:00.000,0.0165,0.148585
4,f57f07ee-a5f4-3d70-927b-2846630af0c7,-34.876446,-56.203718,7,Eduardo D. Carbajal,10,2019-03-15 11:17:00.000,2019-03-15 12:20:00.000,0.0165,0.165094


In [9]:
print("INFO:\n")
for name,df in dfs_dict.items():
    print(name.upper(), '\n')
    print(df.info(), '\n')

INFO:

MONTEVIDEO 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219 entries, 0 to 218
Data columns (total 10 columns):
uuid            219 non-null object
latitude        219 non-null float64
longitude       219 non-null float64
interactions    219 non-null int64
street          210 non-null object
reliability     219 non-null int64
start_time      219 non-null object
end_time        219 non-null object
share           219 non-null float64
cum_share       219 non-null float64
dtypes: float64(4), int64(2), object(4)
memory usage: 17.2+ KB
None 

MIRAFLORES 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
uuid            214 non-null object
latitude        214 non-null float64
longitude       214 non-null float64
interactions    214 non-null int64
street          212 non-null object
reliability     214 non-null int64
start_time      214 non-null object
end_time        214 non-null object
share           214 non-null float64


In [10]:
print("DESCRIPTION:\n")
for name,df in dfs_dict.items():
    print(name.upper(), '\n')
    print(df.drop(columns=['share', 'cum_share']).describe(), '\n')

DESCRIPTION:

MONTEVIDEO 

         latitude   longitude  interactions  reliability
count  219.000000  219.000000    219.000000   219.000000
mean   -34.869907  -56.140832      1.936073     6.050228
std      0.028122    0.057178      2.781908     1.475251
min    -34.927089  -56.270215      1.000000     5.000000
25%    -34.886625  -56.184055      1.000000     5.000000
50%    -34.875342  -56.153690      1.000000     5.000000
75%    -34.857507  -56.090933      2.000000     6.000000
max    -34.728678  -56.031172     36.000000    10.000000 

MIRAFLORES 

         latitude   longitude  interactions  reliability
count  214.000000  214.000000    214.000000   214.000000
mean   -12.123125  -77.028181      5.098131     8.037383
std      0.008000    0.011277      4.688884     1.913261
min    -12.138272  -77.055023      1.000000     5.000000
25%    -12.128512  -77.033952      2.000000     6.000000
50%    -12.124747  -77.028142      4.000000     8.000000
75%    -12.118563  -77.020136      6.750000   

In [11]:
print("VARIABLE CATEGORIES:\n")
for name,df in dfs_dict.items():
    print(name.upper(), '\n')
    for cat in df.columns:
        print("Number of levels in category '{0}': \b {1:2.2f} ".format(cat, df_miraflores[cat].unique().size))
    print("\n")

VARIABLE CATEGORIES:

MONTEVIDEO 

Number of levels in category 'uuid': 214.00 
Number of levels in category 'latitude': 213.00 
Number of levels in category 'longitude': 213.00 
Number of levels in category 'interactions': 21.00 
Number of levels in category 'street': 67.00 
Number of levels in category 'reliability': 6.00 
Number of levels in category 'start_time': 211.00 
Number of levels in category 'end_time': 214.00 
Number of levels in category 'share': 21.00 
Number of levels in category 'cum_share': 214.00 


MIRAFLORES 

Number of levels in category 'uuid': 214.00 
Number of levels in category 'latitude': 213.00 
Number of levels in category 'longitude': 213.00 
Number of levels in category 'interactions': 21.00 
Number of levels in category 'street': 67.00 
Number of levels in category 'reliability': 6.00 
Number of levels in category 'start_time': 211.00 
Number of levels in category 'end_time': 214.00 
Number of levels in category 'share': 21.00 
Number of levels in catego

In [12]:
print("MISSING DATA:\n")
for name,df in dfs_dict.items():
    print(name.upper(), '\n')
    total = df.isnull().sum().sort_values(ascending=False)
    percent = (df.isnull().sum()/df.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    print(missing_data.head(10))

MISSING DATA:

MONTEVIDEO 

              Total   Percent
street            9  0.041096
cum_share         0  0.000000
share             0  0.000000
end_time          0  0.000000
start_time        0  0.000000
reliability       0  0.000000
interactions      0  0.000000
longitude         0  0.000000
latitude          0  0.000000
uuid              0  0.000000
MIRAFLORES 

              Total   Percent
street            2  0.009346
cum_share         0  0.000000
share             0  0.000000
end_time          0  0.000000
start_time        0  0.000000
reliability       0  0.000000
interactions      0  0.000000
longitude         0  0.000000
latitude          0  0.000000
uuid              0  0.000000
RIO DE JANEIRO 

              Total   Percent
street          636  0.050174
cum_share         0  0.000000
share             0  0.000000
end_time          0  0.000000
start_time        0  0.000000
reliability       0  0.000000
interactions      0  0.000000
longitude         0  0.000000
latitude    

# visualization

In [13]:
df_miraflores = phc.treat_alerts_points(df_miraflores)
df_montevideo = phc.treat_alerts_points(df_montevideo)
df_rio =  phc.treat_alerts_points(df_rio)

## Miraflores

In [14]:
osm_place = "Distrito de Miraflores"
which_result=1
lat_centr_point, lon_centr_point = -12.1162673,-77.0306933

In [15]:
pol = ox.gdf_from_place(osm_place, which_result=which_result)

In [16]:
geojson_subzone = pol['geometry'].to_json()

In [17]:
m_hex = choropleth_map(df_aggreg=counts_by_hexagon(df = df_miraflores, resolution = 10), zoom_start=14,
                       coords=[lat_centr_point, lon_centr_point], with_legend=True)

GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex)
m_hex

Comparing Hex Resolutions

In [18]:
coords=[lat_centr_point, lon_centr_point]

df_aggreg_8 = counts_by_hexagon(df = df_miraflores, resolution = 8)
df_aggreg_9 = counts_by_hexagon(df = df_miraflores, resolution = 9)
df_aggreg_10 = counts_by_hexagon(df = df_miraflores, resolution = 10)
df_aggreg_11 = counts_by_hexagon(df = df_miraflores, resolution = 11)

m_hex_11 = choropleth_map(df_aggreg = df_aggreg_11, with_legend = False, zoom_start=14, coords=coords)
m_hex_10 = choropleth_map(df_aggreg = df_aggreg_10, initial_map = m_hex_11, with_legend = False, zoom_start=14, coords=coords)
m_hex_9 = choropleth_map(df_aggreg = df_aggreg_9, initial_map = m_hex_10, with_legend = False, zoom_start=14, coords=coords)
m_hex_8 = choropleth_map(df_aggreg = df_aggreg_8, initial_map = m_hex_9, with_legend = False, zoom_start=14, coords=coords)

GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex_8)

folium.map.LayerControl('bottomright', collapsed=False).add_to(m_hex_8)

m_hex_8

## Montevideo

In [19]:
osm_place = {'city' : 'Montevideo',
             'state' : 'Montevideo',
             'country' : 'Uruguay'}
which_result=1

lat_centr_point, lon_centr_point = -34.8305833,-56.236415

In [20]:
pol = ox.gdf_from_place(osm_place, which_result=which_result)

In [21]:
geojson_subzone = pol['geometry'].to_json()

In [22]:
m_hex = choropleth_map(df_aggreg=counts_by_hexagon(df = df_montevideo, resolution = 8), zoom_start=11,
                       coords=[lat_centr_point, lon_centr_point], with_legend=True)
            
GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex)
m_hex

Comparing Hex Resolutions

In [23]:
coords=[lat_centr_point, lon_centr_point]

df_aggreg_7 = counts_by_hexagon(df = df_montevideo, resolution = 7)
df_aggreg_8 = counts_by_hexagon(df = df_montevideo, resolution = 8)
df_aggreg_9 = counts_by_hexagon(df = df_montevideo, resolution = 9)

m_hex_9 = choropleth_map(df_aggreg = df_aggreg_9, with_legend = False, zoom_start=11, coords=coords)
m_hex_8 = choropleth_map(df_aggreg = df_aggreg_8, initial_map = m_hex_9, with_legend = False, zoom_start=11, coords=coords)
m_hex_7 = choropleth_map(df_aggreg = df_aggreg_7, initial_map = m_hex_8, with_legend = False, zoom_start=11, coords=coords)

GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex_7)

folium.map.LayerControl('bottomright', collapsed=False).add_to(m_hex_7)

m_hex_7

## Rio de Janeiro

In [24]:
osm_place = {'city' : 'Rio de Janeiro',
             'state' : 'Rio de Janeiro',
             'country' : 'Brasil'}
which_result=1

lat_centr_point, lon_centr_point = -22.9395526,-43.4305995

In [25]:
pol = ox.gdf_from_place(osm_place, which_result=which_result)

In [26]:
geojson_subzone = pol['geometry'].to_json()

In [27]:
m_hex = choropleth_map(df_aggreg=counts_by_hexagon(df = df_rio, resolution = 8), zoom_start=11,
                       coords=[lat_centr_point, lon_centr_point], with_legend=True)
            
GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex)
m_hex

Comparing Hex Resolutions

In [28]:
coords=[lat_centr_point, lon_centr_point]

df_aggreg_7 = counts_by_hexagon(df = df_rio, resolution = 7)
df_aggreg_8 = counts_by_hexagon(df = df_rio, resolution = 8)
df_aggreg_9 = counts_by_hexagon(df = df_rio, resolution = 9)

# m_hex_9 = choropleth_map(df_aggreg = df_aggreg_9, with_legend = False, zoom_start=11, coords=coords)
# m_hex_8 = choropleth_map(df_aggreg = df_aggreg_8, initial_map = m_hex_9, with_legend = False, zoom_start=11, coords=coords)
m_hex_8 = choropleth_map(df_aggreg = df_aggreg_8, with_legend = False, zoom_start=11, coords=coords)
m_hex_7 = choropleth_map(df_aggreg = df_aggreg_7, initial_map = m_hex_8, with_legend = False, zoom_start=11, coords=coords)

GeoJson(
        geojson_subzone,
        style_function=lambda feature: {
            'fillColor': None,
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0
        }, 
        name = "Subzone" 
    ).add_to(m_hex_7)

folium.map.LayerControl('bottomright', collapsed=False).add_to(m_hex_7)

m_hex_7