In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid", {'legend.frameon':True})

columns = [
    'id', 'testing_for', 'address', 'zip_code', 'provider',
    'connected_with', 'monthly_price', 'provider_down_speed',
    'provider_price', 'actual_down_speed', 'actual_price', 'rating',
    'completed', 'created_at', 'updated_at', 'latitude', 'longitude',
    'ping', 'actual_upload_speed', 'test_id', 'ip_address', 'hostname',
    'from_mlab', 'area_code', 'test_type', 'census_code',
    'upload_median', 'download_median', 'census_status', 'test_date',
    'country_code', 'region', 'county', 'accuracy', 'location',
    'census_block'
]

# Load and filter CSV in chunks (less memory, require 5GB to load full dataset) to just from_mlab = 0 (SUA submissions)
iter_csv = pd.read_csv('./data/5d164a6e-6099-4824-9540-5bbc91327517.csv', header=None, names=columns, iterator=True, chunksize=1000)
sua = pd.concat([chunk[chunk['from_mlab'] == 0] for chunk in iter_csv])

# Cleanup
sua['rating'] = sua['rating'].replace(0.0,  np.nan)
sua['census_code'].dropna(inplace=True)
sua['census_block'].dropna(inplace=True)

In [2]:
lane = sua[sua['county'] == 41039].copy()
lane = lane[lane['test_type'] != 'duplicate']
lane = lane[lane['census_code'].notnull()]
lane = lane[lane['census_block'].notnull()]

# Convert census_codes to integer from float64
lane['census_code'] = lane['census_code'].astype(np.int64)
lane['census_block'] = lane['census_block'].astype(np.int64)

In [3]:
lane.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,638.0,369.0,307.0,290.0,639.0,290.0,410.0,639.0,639.0,639.0,...,639.0,639.0,0.0,639.0,0.0,0.0,639.0,167.0,0.0,639.0
mean,97422.982759,77.051491,109.687296,5.817931,51.959828,27.946828,4.034146,0.981221,44.042221,-123.113959,...,20.215102,0.0,,41039000000.0,,,41039.0,1799.538922,,410390000000000.0
std,30.214222,39.361135,171.615926,13.421831,71.280502,125.701847,1.870843,0.135851,0.079901,0.226643,...,67.347328,0.0,,1592.675,,,0.0,10184.125963,,15926640.0
min,97401.0,0.0,1.0,0.01,0.09,0.04,1.0,0.0,43.6868,-124.1305,...,0.0,0.0,,41039000000.0,,,41039.0,6.0,,410390000000000.0
25%,97402.0,50.0,13.0,0.6,7.325,1.17,3.0,1.0,44.0283,-123.14705,...,1.415,0.0,,41039000000.0,,,41039.0,48.5,,410390000000000.0
50%,97405.0,70.0,60.0,1.355,26.58,3.035,4.0,1.0,44.05,-123.0941,...,5.72,0.0,,41039000000.0,,,41039.0,58.0,,410390000000000.0
75%,97439.0,90.0,120.0,4.5,68.52,11.225,5.0,1.0,44.0755,-123.0606,...,11.33,0.0,,41039000000.0,,,41039.0,1062.0,,410390000000000.0
max,97490.0,200.0,1000.0,120.0,591.84,1428.57,7.0,1.0,44.2622,-122.1631,...,864.82,0.0,,41039010000.0,,,41039.0,118584.0,,410390100000000.0


In [4]:
lane_tracts = lane['census_code'].unique()
len(lane_tracts)

85

## Census Tract assignments

In [5]:
# Hand built by clicking around on speedupamerica.com's results page
# Next time I will use https://tigerweb.geo.census.gov/tigerweb/
eugene_tracts = [
    41039002201, 41039002202,
    41039002301,
    41039002401, 41039002302, 41039002403, 41039002404,
    41039002501, 41039002503, 41039002504,
    41039002600,
    41039002700,
    41039002800,
    41039002902, 41039002903, 41039002904,
    41039003000,
    41039003101, 41039003102,
    41039003600, # Glenwood and LCC
    41039003700, # UofO
    41039003800,
    41039003900,
    41039004000,
    41039004100,
    41039004200,
    41039004300,
    41039004401, 41039004403, 41039004404, 41039004405,
    41039004501, 41039004502,
    41039004600,
    41039004700,
    41039004800,
    41039004900,
    41039005000,
    41039005100,
    41039005300,
    41039005400
]
springfield_tracts = [
    41039001801, 41039001803, 41039001804, # Thurston
    41039001902, 41039001903, 41039001904, # Centeral Springfield
    41039002001, 41039002002, # North Springfield
    41039002101, 41039002102, # Gateway
    41039003201, 41039003202, # West Springfield (Centenial)
    41039003301, 41039003302, # Springfield
    41039003400, # East of Mowhawl
    41039003500 # Jasper Rd and Dorris Ranch
]

## Eugene

In [6]:
eugene = lane[lane['census_code'].isin(eugene_tracts)]
eugene.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,360.0,179.0,158.0,145.0,360.0,145.0,222.0,360.0,360.0,360.0,...,360.0,360.0,0.0,360.0,0.0,0.0,360.0,106.0,0.0,360.0
mean,97402.811111,77.005587,155.525316,3.707103,69.661556,5.871448,4.666667,0.969444,44.056434,-123.106562,...,30.795639,0.0,,41039000000.0,,,41039.0,2486.311321,,410390000000000.0
std,1.836761,37.046357,211.005651,11.610709,83.393229,14.285555,1.692856,0.17235,0.026602,0.031986,...,87.28175,0.0,,905.4884,,,0.0,12695.264035,,9055002.0
min,97401.0,0.0,1.0,0.01,0.23,0.04,1.0,0.0,43.9933,-123.2005,...,0.1,0.0,,41039000000.0,,,41039.0,6.0,,410390000000000.0
25%,97401.0,50.0,50.0,0.5,16.0775,0.92,4.0,1.0,44.03975,-123.126675,...,4.4925,0.0,,41039000000.0,,,41039.0,40.5,,410390000000000.0
50%,97402.0,70.0,80.0,1.0,43.97,1.72,5.0,1.0,44.05,-123.09865,...,5.975,0.0,,41039000000.0,,,41039.0,65.0,,410390000000000.0
75%,97405.0,92.5,200.0,2.0,83.79,4.11,6.0,1.0,44.074525,-123.0863,...,11.7325,0.0,,41039000000.0,,,41039.0,1089.0,,410390000000000.0
max,97408.0,200.0,1000.0,120.0,591.84,134.85,7.0,1.0,44.1314,-123.0241,...,864.82,0.0,,41039010000.0,,,41039.0,118584.0,,410390100000000.0


In [7]:
eugene.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Comcast Cable Communications, LLC",228,4.442857,4.0,140,72.337982,58.76,228,81.216783,75.0,143,3.137456,1.345,114
XS Media,53,5.833333,6.0,42,114.63566,63.87,53,81.666667,90.0,3,2.725,2.725,2
"CenturyLink Communications, LLC",40,3.416667,4.0,24,14.503,7.7,40,53.75,49.5,28,17.297083,7.14,24
Network for Education and Research in Oregon (NERO),10,6.0,6.0,2,55.042,26.12,10,,,0,,,0
"Emerald Broadband, LLC",5,5.4,7.0,5,29.784,28.82,5,55.0,50.0,4,5.325,2.47,4
AT&T Mobility LLC,4,4.0,4.0,2,9.0225,7.205,4,,,0,,,0
University of Oregon,4,7.0,7.0,1,44.88,44.885,4,,,0,,,0
Verizon Wireless,4,3.0,3.0,1,15.92,13.88,4,200.0,200.0,1,51.81,51.81,1
ATT,2,,,0,86.675,86.675,2,,,0,,,0
Douglas FastNet,2,7.0,7.0,2,145.64,145.64,2,,,0,,,0


In [8]:
eugene.groupby('census_code').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
})

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
census_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
41039002201,1,5.0,5.0,1,1.57,1.57,1,120.0,120.0,1,,,0
41039002202,10,3.75,4.0,4,58.993,41.81,10,89.0,70.0,5,7.315,1.345,4
41039002301,2,3.5,3.5,2,81.89,81.89,2,50.0,50.0,1,,,0
41039002302,8,5.2,6.0,5,38.3975,33.625,8,66.25,70.0,4,1.6325,1.32,4
41039002401,3,4.666667,4.0,3,32.286667,24.33,3,74.0,74.0,2,,,0
41039002403,4,4.0,4.0,2,70.3375,49.365,4,103.333333,100.0,3,1.215,1.215,2
41039002404,6,3.5,3.5,4,44.775,35.0,6,122.0,100.0,5,11.842,1.94,5
41039002501,1,,,0,309.34,309.34,1,,,0,,,0
41039002503,13,4.142857,4.0,7,117.247692,45.96,13,64.0,62.5,8,1.7775,1.825,4
41039002504,1,,,0,57.36,57.36,1,,,0,,,0


## Springfield

In [9]:
springfield = lane[lane['census_code'].isin(springfield_tracts)]
springfield.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,59.0,40.0,37.0,37.0,59.0,37.0,43.0,59.0,59.0,59.0,...,59.0,59.0,0.0,59.0,0.0,0.0,59.0,20.0,0.0,59.0
mean,97477.288136,73.225,116.945946,3.412162,58.373051,9.137297,3.883721,0.983051,44.056025,-122.989878,...,7.899322,0.0,,41039000000.0,,,41039.0,498.0,,410390000000000.0
std,0.456782,31.323365,137.507161,5.597081,58.81681,22.465579,1.802238,0.130189,0.014284,0.045462,...,8.094987,0.0,,665.351,,,0.0,1464.151775,,6653677.0
min,97477.0,13.0,3.0,0.03,0.44,0.52,1.0,0.0,44.0117,-123.043,...,0.26,0.0,,41039000000.0,,,41039.0,11.0,,410390000000000.0
25%,97477.0,45.75,24.0,0.57,16.665,0.83,2.5,1.0,44.04645,-123.022,...,4.39,0.0,,41039000000.0,,,41039.0,29.0,,410390000000000.0
50%,97477.0,70.0,60.0,0.8,33.97,2.11,4.0,1.0,44.0527,-123.0,...,5.75,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
75%,97478.0,89.25,150.0,2.0,73.94,5.82,5.0,1.0,44.0681,-122.9674,...,11.305,0.0,,41039000000.0,,,41039.0,65.0,,410390000000000.0
max,97478.0,161.0,500.0,26.67,260.39,125.0,7.0,1.0,44.0784,-122.8743,...,40.51,0.0,,41039000000.0,,,41039.0,5985.0,,410390000000000.0


In [10]:
springfield.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Comcast Cable Communications, LLC",44,4.375,4.5,32,73.038409,61.26,44,72.892857,70.0,28,2.281111,1.33,27
"CenturyLink Communications, LLC",10,2.714286,2.0,7,9.147,6.66,10,77.222222,75.0,9,30.06875,10.94,8
AVAST Software s.r.o.,1,2.0,2.0,1,31.72,31.72,1,60.0,60.0,1,,,0
SoftLayer Technologies Inc.,1,1.0,1.0,1,23.9,23.9,1,13.0,13.0,1,0.54,0.54,1
Sprint,1,3.0,3.0,1,76.38,76.38,1,,,0,,,0
Verizon Wireless,1,,,0,3.46,3.46,1,,,0,,,0
"ViaSat,Inc.",1,2.0,2.0,1,3.39,3.39,1,120.0,120.0,1,35.4,35.4,1


In [11]:
springfield.groupby('census_code').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
census_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
41039002002,9,4.0,3.0,9,51.726667,56.03,9,53.875,45.0,8,1.57,0.73,8
41039003302,7,3.428571,3.0,7,37.878571,26.58,7,85.666667,87.0,6,13.014,3.2,5
41039001801,5,4.25,5.0,4,40.952,8.38,5,97.0,75.0,3,47.733333,9.25,3
41039002102,5,4.5,4.5,2,56.196,33.97,5,68.25,74.0,4,4.5825,2.585,4
41039002001,4,3.25,3.0,4,55.325,38.185,4,76.333333,69.0,3,2.15,2.15,1
41039003201,4,3.0,3.0,1,28.4675,28.27,4,41.666667,50.0,3,1.476667,1.49,3
41039003202,4,2.75,2.0,4,112.51,82.875,4,94.333333,130.0,3,0.843333,0.54,3
41039001803,3,5.0,5.0,2,83.553333,78.81,3,90.0,90.0,1,7.82,7.82,1
41039001804,3,2.5,2.5,2,81.046667,10.78,3,82.5,82.5,2,28.385,28.385,2
41039002101,3,7.0,7.0,1,115.613333,151.46,3,70.0,70.0,1,4.34,4.34,1


## Lane County

In [12]:
county_tracts = np.setdiff1d(lane_tracts, eugene_tracts)
county_tracts = np.setdiff1d(county_tracts, springfield_tracts)

county = lane[lane['census_code'].isin(county_tracts)]
county.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,219.0,150.0,112.0,108.0,220.0,108.0,145.0,220.0,220.0,220.0,...,220.0,220.0,0.0,220.0,0.0,0.0,220.0,41.0,0.0,220.0
mean,97441.511416,78.126667,42.625,9.476111,21.273455,64.029074,3.110345,1.0,44.015263,-123.159341,...,6.204364,0.0,,41039000000.0,,,41039.0,658.878049,,410390000000000.0
std,29.10206,43.874791,67.37165,16.524226,31.834111,200.339871,1.764296,0.0,0.127558,0.375651,...,16.909151,0.0,,726.957,,,0.0,1704.056545,,7269375.0
min,97402.0,20.0,1.0,0.2,0.09,0.35,1.0,1.0,43.6868,-124.1305,...,0.0,0.0,,41039000000.0,,,41039.0,10.0,,410390000000000.0
25%,97419.0,49.25,10.0,1.3975,2.645,3.945,1.0,1.0,43.942725,-123.33915,...,0.6,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
50%,97437.0,65.0,17.5,3.775,8.11,10.7,3.0,1.0,44.03135,-123.1618,...,1.66,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
75%,97463.0,90.0,60.0,8.3725,22.1025,26.125,4.0,1.0,44.0933,-123.0065,...,5.9625,0.0,,41039000000.0,,,41039.0,144.0,,410390000000000.0
max,97490.0,200.0,400.0,90.0,176.52,1428.57,7.0,1.0,44.2622,-122.1631,...,203.65,0.0,,41039010000.0,,,41039.0,9752.0,,410390100000000.0


In [13]:
county.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"CenturyLink Communications, LLC",89,2.53125,2.0,64,6.569213,3.99,89,62.603175,55.0,63,63.801458,16.495,48
Charter Communications,45,4.444444,5.0,27,43.865556,33.02,45,61.111111,64.0,27,3.844737,1.38,19
Verizon Wireless,21,2.272727,2.0,11,18.085238,7.73,21,117.333333,107.5,12,350.596667,165.015,6
"Comcast Cable Communications, LLC",13,4.5,4.5,8,62.991538,53.91,13,73.0,80.0,9,2.194286,0.8,7
"ViaSat,Inc.",13,2.111111,2.0,9,7.788462,5.86,13,121.384615,105.0,13,112.580909,17.06,11
Hughes Network Systems,9,2.4,3.0,5,5.138889,2.1,9,90.0,92.5,8,42.216667,30.58,6
"Emerald Broadband, LLC",8,3.142857,3.0,7,18.51125,6.115,8,114.285714,50.0,7,21.75,21.75,2
XS Media,8,4.125,4.0,8,19.925,19.79,8,118.8,119.0,5,7.186667,6.84,3
Hunter Communications,6,2.75,3.0,4,36.315,5.71,6,71.5,62.5,4,19.07,19.875,4
AT&T Mobility LLC,3,,,0,12.403333,6.71,3,,,0,,,0


In [14]:
county.groupby('census_code').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
census_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
41039000100,21,2.857143,2.5,14,9.718571,6.65,21,53.0,48.5,16,12.733077,15.38,13
41039001002,20,2.0,1.0,15,9.3875,5.395,20,124.411765,119.0,17,198.44125,31.335,8
41039000902,17,4.25,4.5,12,17.071765,4.42,17,87.142857,85.0,14,106.293,19.61,10
41039000903,16,3.727273,3.0,11,11.71875,2.715,16,60.181818,58.0,11,79.232857,10.63,7
41039001101,13,2.8,3.0,5,23.373846,10.13,13,78.5,60.0,4,19.386667,16.67,3
41039000200,13,2.1,2.0,10,10.287692,5.82,13,87.222222,70.0,9,13.66,5.45,7
41039000904,12,2.75,2.0,8,22.580833,10.07,12,82.1,72.5,10,7.67375,5.96,8
41039000402,10,2.666667,3.0,9,6.893,3.49,10,91.6,77.5,10,198.4925,18.415,8
41039000403,9,4.0,4.0,4,36.667778,7.51,9,66.666667,50.0,3,19.875,19.875,2
41039000800,8,3.0,3.0,7,6.18375,2.185,8,80.714286,80.0,7,224.89,145.255,4
