In [1]:
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, Column, Integer, String, Float, DateTime

import pandas as pd

from flask import Flask, jsonify

In [2]:
engine = create_engine('sqlite:///Data/isp.sqlite')

In [3]:
column_names = ['address_full',
 'major_city',
 'state',
 'lat',
 'lon',
 'block_group',
 'collection_datetime',
 'provider',
 'price',
 'speed_down',
 'speed_up',
 'speed_unit',
 'technology',
 'package',
 'fastest_speed_down',
 'fastest_speed_price',
 'speed_down_bins',
 'redlining_grade',
 'race_perc_non_white',
 'race_quantile',
 'median_household_income',
 'income_dollars_below_median',
 'income_level',
 'ppl_per_sq_mile',
 'n_providers',
 'internet_perc_broadband']

In [4]:
Base = automap_base()

Base.prepare(autoload_with=engine)

Base.classes.keys()

minneapolis_centurylink = Base.classes.Minneapolis_Centurylink

In [5]:
session = Session(engine)

In [25]:
inspector = inspect(engine)

columns = inspector.get_columns('Minneapolis_Centurylink')
for column in columns:
    print(column["name"], column["type"])

address_full VARCHAR(100)
major_city VARCHAR(50)
state VARCHAR(4)
lat FLOAT
lon FLOAT
block_group BIGINT
collection_datetime DATETIME
provider VARCHAR(20)
price FLOAT
speed_down FLOAT
speed_up FLOAT
speed_unit VARCHAR(10)
technology VARCHAR(20)
package VARCHAR(100)
fastest_speed_down FLOAT
fastest_speed_price FLOAT
speed_down_bins VARCHAR(50)
redlining_grade VARCHAR(2)
race_perc_non_white FLOAT
race_quantile VARCHAR(50)
median_household_income FLOAT
income_dollars_below_median FLOAT
income_level VARCHAR(50)
ppl_per_sq_mile FLOAT
n_providers INTEGER
internet_perc_broadband FLOAT


In [43]:
data = session.query(minneapolis_centurylink.address_full, \
                        minneapolis_centurylink.major_city, \
                        minneapolis_centurylink.state, \
                        minneapolis_centurylink.lat, \
                        minneapolis_centurylink.lon, \
                        minneapolis_centurylink.block_group,\
                        minneapolis_centurylink.collection_datetime, \
                        minneapolis_centurylink.provider, \
                        minneapolis_centurylink.price, \
                        minneapolis_centurylink.speed_down, \
                        minneapolis_centurylink.speed_up, \
                        minneapolis_centurylink.speed_unit, \
                        minneapolis_centurylink.technology, \
                        minneapolis_centurylink.package, \
                        minneapolis_centurylink.fastest_speed_down,	\
                        minneapolis_centurylink.fastest_speed_price, \
                        minneapolis_centurylink.speed_down_bins, \
                        minneapolis_centurylink.redlining_grade, \
                        minneapolis_centurylink.race_perc_non_white, \
                        minneapolis_centurylink.race_quantile, \
                        minneapolis_centurylink.median_household_income, \
                        minneapolis_centurylink.income_dollars_below_median, \
                        minneapolis_centurylink.income_level, \
                        minneapolis_centurylink.ppl_per_sq_mile, \
                        minneapolis_centurylink.n_providers, \
                        minneapolis_centurylink.internet_perc_broadband).all()

dataArray = []

for address_full, major_city, state, lat, lon, block_group, collection_datetime,\
    provider, price, speed_down, speed_up, speed_unit, technology, package, \
    fastest_speed_down, fastest_speed_price, speed_down_bins, redlining_grade, \
    race_perc_non_white, race_quantile, median_household_income, income_dollars_below_median, \
    income_level, ppl_per_sq_mile, n_providers, internet_perc_broadband in data:

    rowData = {}
    rowData["address_full"] =  address_full
    rowData["major_city"] =  major_city
    rowData["state"] =  state
    rowData["lat"] =  lat
    rowData["lon"] =  lon
    rowData["block_group"] =  block_group
    rowData["collection_datetime"] =  collection_datetime
    rowData["provider"] =  provider
    rowData["price"] =  price
    rowData["speed_down"] =  speed_down
    rowData["speed_up"] =  speed_up
    rowData["speed_unit"] =  speed_unit
    rowData["technology"] =  technology
    rowData["package"] =  package
    rowData["fastest_speed_down"] =  fastest_speed_down
    rowData["fastest_speed_price"] =  fastest_speed_price
    rowData["speed_down_bins"] =  speed_down_bins
    rowData["redlining_grade"] =  redlining_grade
    rowData["race_perc_non_white"] =  race_perc_non_white
    rowData["race_quantile"] =  race_quantile
    rowData["median_household_income"] =  median_household_income
    rowData["income_dollars_below_median"] =  income_dollars_below_median
    rowData["income_level"] =  income_level
    rowData["ppl_per_sq_mile"] =  ppl_per_sq_mile
    rowData["n_providers"] =  n_providers
    rowData["internet_perc_broadband"] =  internet_perc_broadband

    dataArray.append(rowData)

In [45]:
dataArray[0]

{'address_full': '3739 SNELLING AVE,MINNEAPOLIS,MN 55406,USA',
 'major_city': 'minneapolis',
 'state': 'MN',
 'lat': 44.9348799,
 'lon': -93.2257266,
 'block_group': 270531088001,
 'collection_datetime': datetime.datetime(2022, 4, 16, 22, 8, 27),
 'provider': 'CenturyLink',
 'price': 50.0,
 'speed_down': 200.0,
 'speed_up': 200.0,
 'speed_unit': 'Mbps',
 'technology': 'Fiber',
 'package': 'HSI Upto 200 Mbps/200 Mbps Prepaid',
 'fastest_speed_down': 940.0,
 'fastest_speed_price': 65.0,
 'speed_down_bins': 'Blazing (≥200)',
 'redlining_grade': 'D',
 'race_perc_non_white': 0.6405750798722045,
 'race_quantile': 'least white',
 'median_household_income': 60417.0,
 'income_dollars_below_median': 5651.0,
 'income_level': 'Middle-Lower',
 'ppl_per_sq_mile': 3329.255969792054,
 'n_providers': 3,
 'internet_perc_broadband': 0.6080246913580247}

In [11]:
data = session.query(minneapolis_centurylink.lat, \
                    minneapolis_centurylink.lon, \
                    minneapolis_centurylink.block_group,\
                    minneapolis_centurylink.price, \
                    minneapolis_centurylink.speed_down, \
                    minneapolis_centurylink.speed_up, \
                    minneapolis_centurylink.speed_unit, \
                    minneapolis_centurylink.technology, \
                    minneapolis_centurylink.package, \
                    minneapolis_centurylink.fastest_speed_down,	\
                    minneapolis_centurylink.fastest_speed_price, \
                    minneapolis_centurylink.speed_down_bins, \
                    minneapolis_centurylink.redlining_grade, \
                    minneapolis_centurylink.median_household_income, \
                    minneapolis_centurylink.income_level, \
                    minneapolis_centurylink.n_providers, \
                    minneapolis_centurylink.internet_perc_broadband). \
                    group_by(minneapolis_centurylink.block_group, minneapolis_centurylink.lat, minneapolis_centurylink.lon). \
                    all()



dataArray = []

for lat, lon, block_group, price, speed_down, speed_up, speed_unit, technology, \
    package, fastest_speed_down, fastest_speed_price, speed_down_bins, redlining_grade, \
    median_household_income, income_level, n_providers, internet_perc_broadband in data:
    
    rowData = {}
    rowData["lat"] =  lat
    rowData["lon"] =  lon
    rowData["block_group"] =  block_group
    rowData["price"] =  price
    rowData["speed_down"] =  speed_down
    rowData["speed_up"] =  speed_up
    rowData["speed_unit"] =  speed_unit
    rowData["technology"] =  technology
    rowData["package"] =  package
    rowData["fastest_speed_down"] =  fastest_speed_down
    rowData["fastest_speed_price"] =  fastest_speed_price
    rowData["speed_down_bins"] =  speed_down_bins
    rowData["redlining_grade"] =  redlining_grade
    rowData["median_household_income"] =  median_household_income
    rowData["income_level"] =  income_level
    rowData["n_providers"] =  n_providers
    rowData["internet_perc_broadband"] =  internet_perc_broadband

    dataArray.append(rowData)

In [13]:
dataArray[0:5]

[{'lat': 45.0476859,
  'lon': -93.302072,
  'block_group': 270530001011,
  'price': 50.0,
  'speed_down': 200.0,
  'speed_up': 200.0,
  'speed_unit': 'Mbps',
  'technology': 'Fiber',
  'package': 'HSI Upto 200 Mbps/200 Mbps Prepaid',
  'fastest_speed_down': 940.0,
  'fastest_speed_price': 65.0,
  'speed_down_bins': 'Blazing (≥200)',
  'redlining_grade': 'D',
  'median_household_income': 58011.0,
  'income_level': 'Middle-Lower',
  'n_providers': 2,
  'internet_perc_broadband': 0.7493606138107417},
 {'lat': 45.047731,
  'lon': -93.3046207,
  'block_group': 270530001011,
  'price': 50.0,
  'speed_down': 200.0,
  'speed_up': 200.0,
  'speed_unit': 'Mbps',
  'technology': 'Fiber',
  'package': 'HSI Upto 200 Mbps/200 Mbps Prepaid',
  'fastest_speed_down': 940.0,
  'fastest_speed_price': 65.0,
  'speed_down_bins': 'Blazing (≥200)',
  'redlining_grade': 'D',
  'median_household_income': 58011.0,
  'income_level': 'Middle-Lower',
  'n_providers': 2,
  'internet_perc_broadband': 0.7493606138107

In [37]:
test_df = pd.DataFrame(dataArray)

In [38]:
test_df.head()

Unnamed: 0,lat,lon,block_group,price,speed_down,speed_up,speed_unit,technology,package,fastest_speed_down,fastest_speed_price,speed_down_bins,redlining_grade,median_household_income,income_level,n_providers,internet_perc_broadband
0,45.047686,-93.302072,270530001011,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),D,58011.0,Middle-Lower,2,0.749361
1,45.047731,-93.304621,270530001011,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),D,58011.0,Middle-Lower,2,0.749361
2,45.047734,-93.300795,270530001011,50.0,80.0,10.0,Mbps,Not Fiber,HSI Upto 80 Mbps/10 Mbps Prepaid,80.0,50.0,Medium (25-99),D,58011.0,Middle-Lower,2,0.749361
3,45.048006,-93.300095,270530001011,50.0,80.0,10.0,Mbps,Not Fiber,HSI Upto 80 Mbps/10 Mbps Prepaid,80.0,50.0,Medium (25-99),D,58011.0,Middle-Lower,2,0.749361
4,45.048029,-93.299572,270530001011,50.0,80.0,10.0,Mbps,Not Fiber,HSI Upto 80 Mbps/10 Mbps Prepaid,80.0,50.0,Medium (25-99),D,58011.0,Middle-Lower,2,0.749361


In [80]:
groupedCoords_df = test_df.groupby('block_group')['lat', 'lon'].agg(lats=('lat', 'unique'),
                                                                    lons=('lon', 'unique'))

# groupedCoords_df["'lat', 'unique'"].rename('lats')
groupedCoords_df.head()


  groupedCoords_df = test_df.groupby('block_group')['lat', 'lon'].agg(lats=('lat', 'unique'),


Unnamed: 0_level_0,lats,lons
block_group,Unnamed: 1_level_1,Unnamed: 2_level_1
270530001011,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."
270530001012,"[45.0440775, 45.0442266, 45.0444263, 45.044520...","[-93.3083394, -93.3108759, -93.3063478, -93.31..."
270530001013,"[45.0390705, 45.0433993, 45.0443602, 45.044369...","[-93.2995775, -93.3061898, -93.3172353, -93.31..."
270530001014,"[45.0477908, 45.0478452, 45.0478498, 45.047851...","[-93.3146669, -93.3096769, -93.3122009, -93.31..."
270530001021,"[45.0348174, 45.0349987, 45.0365521, 45.037753...","[-93.2876657, -93.2875191, -93.2870677, -93.28..."


In [81]:
groupedCoords_df.columns

Index(['lats', 'lons'], dtype='object')

In [39]:
test_df.count()

lat                        10875
lon                        10875
block_group                10875
price                      10875
speed_down                 10875
speed_up                   10875
speed_unit                 10875
technology                 10875
package                    10875
fastest_speed_down         10875
fastest_speed_price        10875
speed_down_bins            10875
redlining_grade             8811
median_household_income    10470
income_level               10470
n_providers                10875
internet_perc_broadband    10875
dtype: int64

In [51]:
test_df['redlining_grade'] = test_df['redlining_grade'].fillna('None')

test_df['median_household_income'] = test_df['median_household_income'].fillna('Unknown')
test_df['income_level'] = test_df['income_level'].fillna('Unknown')

In [53]:
test_df.count()

lat                        10875
lon                        10875
block_group                10875
price                      10875
speed_down                 10875
speed_up                   10875
speed_unit                 10875
technology                 10875
package                    10875
fastest_speed_down         10875
fastest_speed_price        10875
speed_down_bins            10875
redlining_grade            10875
median_household_income    10875
income_level               10875
n_providers                10875
internet_perc_broadband    10875
dtype: int64

In [59]:
unique_df = test_df[['block_group', 'price', 'speed_down', 'speed_up', 'speed_unit', 'technology', 'package', \
                    'fastest_speed_down', 'fastest_speed_price', 'speed_down_bins', 'redlining_grade', \
                    'median_household_income', 'income_level', 'n_providers', 'internet_perc_broadband']].drop_duplicates()

unique_df.count()

block_group                1529
price                      1529
speed_down                 1529
speed_up                   1529
speed_unit                 1529
technology                 1529
package                    1529
fastest_speed_down         1529
fastest_speed_price        1529
speed_down_bins            1529
redlining_grade            1529
median_household_income    1529
income_level               1529
n_providers                1529
internet_perc_broadband    1529
dtype: int64

In [82]:
merged_df = unique_df.merge(groupedCoords_df, on='block_group')

merged_df.head()

Unnamed: 0,block_group,price,speed_down,speed_up,speed_unit,technology,package,fastest_speed_down,fastest_speed_price,speed_down_bins,redlining_grade,median_household_income,income_level,n_providers,internet_perc_broadband,lats,lons
0,270530001011,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),D,58011.0,Middle-Lower,2,0.749361,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."
1,270530001011,50.0,80.0,10.0,Mbps,Not Fiber,HSI Upto 80 Mbps/10 Mbps Prepaid,80.0,50.0,Medium (25-99),D,58011.0,Middle-Lower,2,0.749361,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."
2,270530001011,50.0,40.0,3.0,Mbps,Not Fiber,HSI Upto 40 Mbps/3 Mbps Prepaid,40.0,50.0,Medium (25-99),D,58011.0,Middle-Lower,2,0.749361,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."
3,270530001011,50.0,10.0,0.75,Mbps,Not Fiber,HSI Upto 10 Mbps/750 Kbps Prepaid,10.0,50.0,Slow (<25 Mbps),D,58011.0,Middle-Lower,2,0.749361,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."
4,270530001011,50.0,140.0,20.0,Mbps,Not Fiber,HSI Upto 140 Mbps/20 Mbps Prepaid,140.0,50.0,Fast (100-199),D,58011.0,Middle-Lower,2,0.749361,"[45.0476859, 45.047731, 45.0477342, 45.0480062...","[-93.302072, -93.3046207, -93.3007946, -93.300..."


In [84]:
merged_df.dtypes

block_group                  int64
price                      float64
speed_down                 float64
speed_up                   float64
speed_unit                  object
technology                  object
package                     object
fastest_speed_down         float64
fastest_speed_price        float64
speed_down_bins             object
redlining_grade             object
median_household_income     object
income_level                object
n_providers                  int64
internet_perc_broadband    float64
lats                        object
lons                        object
dtype: object