In [2]:
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid", {'legend.frameon':True})

columns = [
    'id', 'testing_for', 'address', 'zip_code', 'provider',
    'connected_with', 'monthly_price', 'provider_down_speed',
    'provider_price', 'actual_down_speed', 'actual_price', 'rating',
    'completed', 'created_at', 'updated_at', 'latitude', 'longitude',
    'ping', 'actual_upload_speed', 'test_id', 'ip_address', 'hostname',
    'from_mlab', 'area_code', 'test_type', 'census_code',
    'upload_median', 'download_median', 'census_status', 'test_date',
    'country_code', 'region', 'county', 'accuracy', 'location',
    'census_block'
]

# Load and filter CSV in chunks (less memory, require 5GB to load full dataset) to just from_mlab = 0 (SUA submissions)
iter_csv = pd.read_csv('./data/9035f7b8-2d2f-4de0-a816-4067e1ae8fd8.csv', header=None, names=columns, iterator=True, chunksize=1000)
sua = pd.concat([chunk[chunk['from_mlab'] == 0] for chunk in iter_csv])
sua['rating'] = sua['rating'].replace(0.0,  np.nan)

In [29]:
lane = sua[sua['county'] == 41039]

In [30]:
lane.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,785.0,430.0,368.0,348.0,786.0,348.0,500.0,786.0,786.0,786.0,...,786.0,786.0,0.0,786.0,0.0,0.0,786.0,182.0,0.0,752.0
mean,97423.163057,77.755814,149.3125,5.335374,58.126794,28.249511,4.164,0.978372,44.044947,-123.115524,...,23.806489,0.0,,41039000000.0,,,41039.0,1984.379121,,410390000000000.0
std,30.281358,40.597156,548.745811,12.423948,77.764096,122.794332,1.887917,0.14556,0.074965,0.215943,...,75.134598,0.0,,1574.336,,,0.0,10593.162882,,15780510.0
min,97401.0,0.0,0.0,0.01,0.03,0.04,1.0,0.0,43.6868,-124.1305,...,0.0,0.0,,41039000000.0,,,41039.0,6.0,,410390000000000.0
25%,97402.0,50.0,12.0,0.54,7.7225,1.085,3.0,1.0,44.034,-123.1471,...,1.7625,0.0,,41039000000.0,,,41039.0,39.25,,410390000000000.0
50%,97405.0,70.0,60.0,1.33,30.665,2.78,4.0,1.0,44.05,-123.0941,...,5.77,0.0,,41039000000.0,,,41039.0,52.0,,410390000000000.0
75%,97439.0,93.75,150.0,4.26,74.46,10.9175,6.0,1.0,44.076175,-123.06125,...,11.4275,0.0,,41039000000.0,,,41039.0,1062.0,,410390000000000.0
max,97490.0,200.0,10000.0,120.0,631.46,1428.57,7.0,1.0,44.2622,-122.1631,...,864.82,0.0,,41039010000.0,,,41039.0,118584.0,,410390100000000.0


In [21]:
lane_tracts = lane['census_code'].unique()
len(lane_tracts)

85

## Census Tract assignments

In [31]:
eugene_tracts = [
    41039002201, 41039002202,
    41039002301,
    41039002401, 41039002302, 41039002403, 41039002404,
    41039002501, 41039002503, 41039002504,
    41039002600,
    41039002700,
    41039002800,
    41039002902, 41039002903, 41039002904,
    41039003000,
    41039003101, 41039003102,
    41039003600, # Glenwood and LCC
    41039003700, # UofO
    41039003800,
    41039003900,
    41039004000,
    41039004100,
    41039004200,
    41039004300,
    41039004401, 41039004403, 41039004404, 41039004405,
    41039004501, 41039004502,
    41039004600,
    41039004700,
    41039004800,
    41039004900,
    41039005000,
    41039005100,
    41039005300,
    41039005400
]
springfield_tracts = [
    41039001801, 41039001803, 41039001804, # Thurston
    41039001902, 41039001903, 41039001904, # Centeral Springfield
    41039002001, 41039002002, # North Springfield
    41039002101, 41039002102, # Gateway
    41039003201, 41039003202, # West Springfield (Centenial)
    41039003301, 41039003302, # Springfield
    41039003400, # East of Mowhawl
    41039003500 # Jasper Rd and Dorris Ranch
]

## Eugene

In [37]:
eugene = sua[sua['census_code'].isin(eugene_tracts)]
eugene.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,451.0,214.0,193.0,179.0,451.0,179.0,275.0,451.0,451.0,451.0,...,451.0,451.0,0.0,451.0,0.0,0.0,451.0,122.0,0.0,440.0
mean,97402.838137,76.17757,168.378238,3.195531,77.65643,5.25,4.825455,0.966741,44.057102,-123.107488,...,36.512173,0.0,,41039000000.0,,,41039.0,2640.434426,,410390000000000.0
std,1.854235,36.544092,215.405158,10.519312,90.737394,13.049122,1.654039,0.179512,0.026685,0.032094,...,96.44129,0.0,,901.3008,,,0.0,12852.707539,,8843496.0
min,97401.0,0.0,1.0,0.01,0.23,0.04,1.0,0.0,43.9933,-123.2005,...,0.1,0.0,,41039000000.0,,,41039.0,6.0,,410390000000000.0
25%,97401.0,50.0,50.0,0.34,16.865,0.79,4.0,1.0,44.03985,-123.1297,...,4.55,0.0,,41039000000.0,,,41039.0,39.25,,410390000000000.0
50%,97402.0,70.0,80.0,0.83,53.99,1.66,5.0,1.0,44.05,-123.099,...,8.35,0.0,,41039000000.0,,,41039.0,65.0,,410390000000000.0
75%,97405.0,90.0,200.0,1.715,98.705,3.97,6.0,1.0,44.07475,-123.0864,...,11.895,0.0,,41039000000.0,,,41039.0,1062.0,,410390000000000.0
max,97408.0,200.0,1000.0,120.0,631.46,134.85,7.0,1.0,44.1314,-123.0241,...,864.82,0.0,,41039010000.0,,,41039.0,118584.0,,410390100000000.0


In [38]:
eugene.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Comcast Cable Communications, LLC",284,4.546512,5.0,172,79.011444,63.78,284,79.69186,70.5,172,2.99007,1.24,142
XS Media,70,6.087719,6.0,57,133.306571,99.025,70,76.0,74.5,4,1.916667,0.87,3
"CenturyLink Communications, LLC",45,3.36,4.0,25,13.631778,7.72,45,52.451613,47.0,31,15.974815,6.8,27
Network for Education and Research in Oregon (NERO),12,6.0,6.0,2,55.5325,26.12,12,,,0,,,0
University of Oregon,6,6.0,6.0,3,30.76,9.16,6,,,0,,,0
Verizon Wireless,6,3.5,3.5,2,43.838333,26.68,6,200.0,200.0,2,26.52,26.52,2
"Emerald Broadband, LLC",5,5.4,7.0,5,29.784,28.82,5,55.0,50.0,4,5.325,2.47,4
AT&T Mobility LLC,4,4.0,4.0,2,9.0225,7.205,4,,,0,,,0
ATT,2,,,0,86.675,86.675,2,,,0,,,0
Douglas FastNet,2,7.0,7.0,2,145.64,145.64,2,,,0,,,0


## Springfield

In [39]:
springfield = sua[sua['census_code'].isin(springfield_tracts)]
springfield.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,73.0,46.0,44.0,42.0,73.0,42.0,53.0,73.0,73.0,73.0,...,73.0,73.0,0.0,73.0,0.0,0.0,73.0,22.0,0.0,66.0
mean,97477.260274,75.304348,119.022727,3.551429,65.382055,8.264762,4.0,0.972603,44.056262,-122.991363,...,7.984384,0.0,,41039000000.0,,,41039.0,599.727273,,410390000000000.0
std,0.441821,29.714097,146.374081,5.469011,58.441767,21.193785,1.82925,0.164368,0.015006,0.044957,...,7.374372,0.0,,650.1787,,,0.0,1512.805819,,6441427.0
min,97477.0,13.0,0.0,0.03,0.44,0.38,1.0,0.0,44.0117,-123.043,...,0.26,0.0,,41039000000.0,,,41039.0,11.0,,410390000000000.0
25%,97477.0,51.25,18.75,0.585,23.9,0.8375,3.0,1.0,44.0462,-123.022,...,5.08,0.0,,41039000000.0,,,41039.0,22.25,,410390000000000.0
50%,97477.0,70.0,60.0,1.0,49.86,1.705,4.0,1.0,44.0582,-123.0016,...,5.77,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
75%,97478.0,88.0,150.0,2.8,89.37,4.3925,5.0,1.0,44.0683,-122.9726,...,11.38,0.0,,41039000000.0,,,41039.0,65.0,,410390000000000.0
max,97478.0,150.0,500.0,26.67,260.39,125.0,7.0,1.0,44.0784,-122.8743,...,40.51,0.0,,41039000000.0,,,41039.0,5985.0,,410390000000000.0


In [41]:
springfield.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Comcast Cable Communications, LLC",58,4.5,5.0,42,78.518448,67.16,58,78.882353,71.5,34,2.205313,1.415,32
"CenturyLink Communications, LLC",10,2.142857,1.0,7,7.997,5.855,10,65.444444,55.0,9,30.07625,10.97,8
AVAST Software s.r.o.,1,2.0,2.0,1,31.72,31.72,1,60.0,60.0,1,,,0
SoftLayer Technologies Inc.,1,1.0,1.0,1,23.9,23.9,1,13.0,13.0,1,0.54,0.54,1
Sprint,1,3.0,3.0,1,76.38,76.38,1,,,0,,,0
Verizon Wireless,1,,,0,3.46,3.46,1,,,0,,,0
"ViaSat,Inc.",1,2.0,2.0,1,3.39,3.39,1,120.0,120.0,1,35.4,35.4,1


## Lane County

In [42]:
county_tracts = np.setdiff1d(lane_tracts, eugene_tracts)
county_tracts = np.setdiff1d(county_tracts, springfield_tracts)

county = sua[sua['census_code'].isin(county_tracts)]
county.describe()

Unnamed: 0,zip_code,monthly_price,provider_down_speed,provider_price,actual_down_speed,actual_price,rating,completed,latitude,longitude,...,actual_upload_speed,from_mlab,area_code,census_code,upload_median,download_median,county,accuracy,location,census_block
count,261.0,170.0,131.0,127.0,262.0,127.0,172.0,262.0,262.0,262.0,...,262.0,262.0,0.0,262.0,0.0,0.0,262.0,38.0,0.0,246.0
mean,97443.153257,80.405882,131.396947,8.941339,22.487481,67.275276,3.156977,1.0,44.020871,-123.16395,...,6.343702,0.0,,41039000000.0,,,41039.0,679.736842,,410390000000000.0
std,28.628514,47.472009,879.375729,15.438889,33.193409,196.771799,1.807654,0.0,0.121406,0.362133,...,16.514166,0.0,,688.784,,,0.0,1764.98054,,7010074.0
min,97402.0,20.0,1.0,0.02,0.03,0.35,1.0,1.0,43.6868,-124.1305,...,0.0,0.0,,41039000000.0,,,41039.0,10.0,,410390000000000.0
25%,97424.0,45.0,10.0,1.46,2.67,3.88,1.0,1.0,43.9467,-123.3415,...,0.6,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
50%,97438.0,65.0,15.0,3.8,8.11,10.89,3.0,1.0,44.03875,-123.1673,...,1.66,0.0,,41039000000.0,,,41039.0,50.0,,410390000000000.0
75%,97478.0,100.0,42.5,8.415,25.5,25.915,4.0,1.0,44.1032,-123.0032,...,5.895,0.0,,41039000000.0,,,41039.0,138.0,,410390000000000.0
max,97490.0,200.0,10000.0,90.0,176.52,1428.57,7.0,1.0,44.2622,-122.1631,...,203.65,0.0,,41039010000.0,,,41039.0,9752.0,,410390100000000.0


In [44]:
county.groupby('provider').agg({
    "id": ["count"],
    "rating":  ["mean", "median", "count"],
    "actual_down_speed": ["mean", "median", "count"],
    "monthly_price": ["mean", "median", "count"],
    "actual_price": ["mean", "median", "count"]
}).sort_values(('id', 'count'), ascending=False).head(15)

Unnamed: 0_level_0,id,rating,rating,rating,actual_down_speed,actual_down_speed,actual_down_speed,monthly_price,monthly_price,monthly_price,actual_price,actual_price,actual_price
Unnamed: 0_level_1,count,mean,median,count,mean,median,count,mean,median,count,mean,median,count
provider,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"CenturyLink Communications, LLC",104,2.571429,2.0,70,6.571923,3.71,104,60.652174,54.0,69,58.635094,16.22,53
Charter Communications,53,4.636364,5.0,33,49.042075,37.11,53,64.333333,64.5,30,3.446818,1.195,22
Verizon Wireless,25,2.2,2.0,15,17.2652,8.25,25,117.333333,107.5,12,350.596667,165.015,6
"ViaSat,Inc.",16,1.833333,1.5,12,7.43875,5.78,16,144.875,169.0,16,154.854286,20.655,14
"Comcast Cable Communications, LLC",15,4.666667,5.0,9,65.039333,63.12,15,71.7,70.0,10,2.03375,0.855,8
Hughes Network Systems,13,2.333333,3.0,9,4.800769,2.1,13,88.75,92.5,12,86.595,30.58,10
"Emerald Broadband, LLC",9,3.625,3.5,8,24.635556,7.14,9,125.0,125.0,8,15.406667,7.0,3
XS Media,9,4.111111,4.0,9,19.324444,17.54,9,118.8,119.0,5,7.186667,6.84,3
Hunter Communications,8,2.75,3.0,4,46.235,13.315,8,71.5,62.5,4,19.07,19.875,4
LS Networks,4,5.0,4.0,3,6.9375,2.425,4,41.5,42.0,4,15.6625,17.5,4
