In [1]:
%load_ext sql
%config SqlMagic.feedback = False
%matplotlib inline

import os
import pandas as pd
import numpy as np
from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz

# Get env vars from local.env
host = %env DB_HOSTNAME
user = %env DB_USERNAME
password = %env DB_PASSWORD
db = %env DB_NAME
token =  %env MAPBOX_API_KEY

# Connection URL to our local MySQL DB
#%sql mysql+mysqldb://{user}:{password}@{host}/{db}?charset=utf8

In [2]:
#subs = %sql SELECT * FROM submissions WHERE from_mlab = 0
#subs = subs.DataFrame()
#subs['latitude'].dropna(inplace=True)
#subs['longitude'].dropna(inplace=True)
#subs.describe()

In [3]:
columns = [
    'id', 'testing_for', 'address', 'zip_code', 'provider',
    'connected_with', 'monthly_price', 'provider_down_speed',
    'provider_price', 'actual_down_speed', 'actual_price', 'rating',
    'completed', 'created_at', 'updated_at', 'latitude', 'longitude',
    'ping', 'actual_upload_speed', 'test_id', 'ip_address', 'hostname',
    'from_mlab', 'area_code', 'test_type', 'census_code',
    'upload_median', 'download_median', 'census_status', 'test_date',
    'country_code', 'region', 'county', 'accuracy', 'location',
    'census_block'
]

# Load and filter CSV in chunks (less memory, require 5GB to load full dataset) to just from_mlab = 0 (SUA submissions)
iter_csv = pd.read_csv('./data/9035f7b8-2d2f-4de0-a816-4067e1ae8fd8.csv', header=None, names=columns,  iterator=True, chunksize=1000)
sua = pd.concat([chunk[chunk['from_mlab'] == 0] for chunk in iter_csv])

# Cleanup
sua['rating'] = sua['rating'].replace(0.0,  np.nan)

In [18]:
#lane = sua[sua['county'] == 41039].copy()
lane = sua.copy()

lane.fillna(value=np.nan, inplace=True)
#lane = lane[lane.accuracy.notnull() & lane.latitude.notnull() & lane.longitude.notnull()]
lane = lane[lane.latitude.notnull() & lane.longitude.notnull()]

In [19]:
lane.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,1035.0,556.0,487.0,461.0,1038.0,461.0,657.0,1038.0,1038.0,1038.0,...,1038.0,1038.0,0.0,1014.0,0.0,0.0,1037.0,271.0,0.0,934.0
mean,95060.711111,75.444245,139.765914,5.615965,57.573911,27.309241,4.225266,0.982659,43.747552,-121.367168,...,27.516599,0.0,,40541410000.0,,,40057.395371,14266.43,,410741900000000.0
std,10964.223158,38.922822,489.760627,12.642658,83.934222,111.440545,1.949304,0.130602,2.133904,7.974292,...,82.677206,0.0,,4470706000.0,,,5606.976028,117677.1,,15556550000000.0
min,6473.0,0.0,0.0,0.01,0.03,0.04,1.0,0.0,20.9077,-157.9925,...,0.0,0.0,,6037462000.0,,,4019.0,6.0,,160119500000000.0
25%,97401.0,50.0,12.0,0.54,7.73,1.15,3.0,1.0,44.020125,-123.1607,...,1.9275,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
50%,97405.0,65.0,60.0,1.35,29.69,3.02,4.0,1.0,44.05,-123.0927,...,5.82,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
75%,97448.0,90.0,150.0,4.5,72.4575,10.89,6.0,1.0,44.088475,-123.0215,...,11.5975,0.0,,41039000000.0,,,41039.0,1062.0,,410390000000000.0
max,99352.0,200.0,10000.0,120.0,828.2,1428.57,7.0,1.0,47.548,-72.8397,...,918.31,0.0,,55025000000.0,,,55025.0,1488703.0,,530630100000000.0


In [21]:
# Create a geojson file export from a Pandas dataframe
#df_to_geojson(lane, filename='points.geojson', properties=['accuracy'], lat='latitude', lon='longitude', precision=3)
df_to_geojson(lane, filename='points.geojson', properties=[], lat='latitude', lon='longitude', precision=3)

{'type': 'file', 'filename': 'points.geojson', 'feature_count': 1038}

In [22]:
# Generate data breaks and color stops from colorBrewer
color_breaks = [0, 10, 25, 50, 100, 1000]
color_stops = create_color_stops(color_breaks, colors='YlGnBu')

# Create the viz from the dataframe
viz = CircleViz('points.geojson',
    access_token=token,
    height='800px',
#    color_property = "accuracy",
    color_stops = color_stops,
    center = (-123.09, 44.082),
    zoom = 8,
    below_layer = 'waterway-label'
)
viz.show()