In [1]:
import sqlite3
from sqlite3 import Error

import pandas as pd

from sqlalchemy import create_engine, Column, Integer, String

import datetime as dt

In [2]:
conn = sqlite3.connect('Data/isp.sqlite')

In [3]:
conn.execute('''CREATE TABLE Minneapolis_Centurylink
    (address_full varchar(100) PRIMARY KEY,
    major_city varchar(50),
    state varchar(4),
    lat float,
    lon float,
    block_group bigint,
    collection_datetime datetime,
    provider varchar(20),
    price double,
    speed_down float,
    speed_up float,
    speed_unit varchar(10),
    technology varchar(20),
    package varchar(100),
    fastest_speed_down float,
    fastest_speed_price double,
    speed_down_bins varchar(50),
    redlining_grade varchar(2),
    race_perc_non_white double,
    race_quantile varchar(50),
    median_household_income float,
    income_dollars_below_median float,
    income_level varchar(50),
    ppl_per_sq_mile double,
    n_providers int,
    internet_perc_broadband double
    );''')

<sqlite3.Cursor at 0x16c71373b90>

In [19]:
conn.execute('''CREATE TABLE Minneapolis_Centurylink_Grouped
    (block_group bigint PRIMARY KEY,
    price double,
    speed_down float,
    speed_up float,
    speed_unit varchar(10),
    technology varchar(20),
    package varchar(100),
    fastest_speed_down float,
    fastest_speed_price double,
    speed_down_bins varchar(50),
    redlining_grade varchar(2),
    median_household_income float,
    income_dollars_below_median float,
    income_level varchar(50),
    ppl_per_sq_mile double,
    n_providers int,
    internet_perc_broadband double,
    lats blob,
    lons blob
    );''')

<sqlite3.Cursor at 0x192d7453180>

In [18]:
# For dropping tables as needed

# conn.execute('''DROP TABLE Minneapolis_Centurylink_Grouped;''')

<sqlite3.Cursor at 0x192d7453110>

In [4]:
minneapolis_df = pd.read_csv('Data/minneapolis_centurylink_plans.csv')

In [5]:
minneapolis_df.head()

Unnamed: 0,address_full,major_city,state,lat,lon,block_group,collection_datetime,provider,price,speed_down,...,speed_down_bins,redlining_grade,race_perc_non_white,race_quantile,median_household_income,income_dollars_below_median,income_level,ppl_per_sq_mile,n_providers,internet_perc_broadband
0,"3739 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.93488,-93.225727,270531088001,1650164907,CenturyLink,50.0,200.0,...,Blazing (≥200),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3.0,0.608025
1,"3636 34TH AVE S,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.936622,-93.223331,270531088001,1650164906,CenturyLink,50.0,200.0,...,Blazing (≥200),B,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3.0,0.608025
2,"3740 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934692,-93.226591,270531088001,1650164915,CenturyLink,50.0,200.0,...,Blazing (≥200),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3.0,0.608025
3,"3759 1/2 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934468,-93.22521,270531088001,1650164918,CenturyLink,50.0,30.0,...,Medium (25-99),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3.0,0.608025
4,"3741 MINNEHAHA AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934691,-93.22382,270531088001,1650164916,CenturyLink,50.0,200.0,...,Blazing (≥200),C,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3.0,0.608025


In [6]:
minneapolis_df['collection_datetime'] = pd.to_datetime(minneapolis_df['collection_datetime'], unit='s', utc=True).map(lambda x: x.tz_convert('America/Chicago'))

In [7]:
minneapolis_df.count()

address_full                   11583
major_city                     11583
state                          11583
lat                            11583
lon                            11583
block_group                    11583
collection_datetime            11583
provider                       11583
price                          11583
speed_down                     11583
speed_up                       11583
speed_unit                     11583
technology                     11583
package                        11583
fastest_speed_down             11583
fastest_speed_price            11583
speed_down_bins                11583
redlining_grade                 9279
race_perc_non_white            11583
race_quantile                  11583
median_household_income        11093
income_dollars_below_median    11093
income_level                   11093
ppl_per_sq_mile                11583
n_providers                    11583
internet_perc_broadband        11583
dtype: int64

In [8]:
minneapolis_df['redlining_grade'] = minneapolis_df['redlining_grade'].fillna('None')

minneapolis_df['median_household_income'] = minneapolis_df['median_household_income'].fillna('Unknown')
minneapolis_df['income_level'] = minneapolis_df['income_level'].fillna('Unknown')

In [9]:
minneapolis_df.count()

address_full                   11583
major_city                     11583
state                          11583
lat                            11583
lon                            11583
block_group                    11583
collection_datetime            11583
provider                       11583
price                          11583
speed_down                     11583
speed_up                       11583
speed_unit                     11583
technology                     11583
package                        11583
fastest_speed_down             11583
fastest_speed_price            11583
speed_down_bins                11583
redlining_grade                11583
race_perc_non_white            11583
race_quantile                  11583
median_household_income        11583
income_dollars_below_median    11093
income_level                   11583
ppl_per_sq_mile                11583
n_providers                    11583
internet_perc_broadband        11583
dtype: int64

In [10]:
minneapolis_df.to_sql('Minneapolis_Centurylink', conn, if_exists='append', index= False)

11583

In [12]:
groupedCoords_df = minneapolis_df.groupby('block_group')['lat', 'lon'].agg(lats=('lat', 'unique'),
                                                                    lons=('lon', 'unique'))

# groupedCoords_df["'lat', 'unique'"].rename('lats')
groupedCoords_df.head()


  groupedCoords_df = minneapolis_df.groupby('block_group')['lat', 'lon'].agg(lats=('lat', 'unique'),


Unnamed: 0_level_0,lats,lons
block_group,Unnamed: 1_level_1,Unnamed: 2_level_1
270530001011,"[45.0506428, 45.0495296, 45.0496982, 45.050298...","[-93.3020727, -93.3038677, -93.304622, -93.307..."
270530001012,"[45.0464171, 45.0468724, 45.0465789, 45.044749...","[-93.308919, -93.3114655, -93.3083802, -93.303..."
270530001013,"[45.0460791, 45.0445234, 45.0468721, 45.046750...","[-93.3147175, -93.3121481, -93.3127335, -93.31..."
270530001014,"[45.0478498, 45.0503846, 45.049679, 45.0500047...","[-93.3122009, -93.318589, -93.3172903, -93.316..."
270530001021,"[45.0390938, 45.0418507, 45.0423416, 45.041465...","[-93.28848, -93.2885091, -93.2879572, -93.2869..."


In [13]:
unique_df = minneapolis_df[['block_group', 'price', 'speed_down', 'speed_up', 'speed_unit', 'technology', 'package', \
                    'fastest_speed_down', 'fastest_speed_price', 'speed_down_bins', 'redlining_grade', \
                    'median_household_income', 'income_level', 'n_providers', 'internet_perc_broadband']].drop_duplicates()

unique_df.count()

block_group                1553
price                      1553
speed_down                 1553
speed_up                   1553
speed_unit                 1553
technology                 1553
package                    1553
fastest_speed_down         1553
fastest_speed_price        1553
speed_down_bins            1553
redlining_grade            1553
median_household_income    1553
income_level               1553
n_providers                1553
internet_perc_broadband    1553
dtype: int64

In [14]:
merged_df = unique_df.merge(groupedCoords_df, on='block_group')

merged_df.head()

Unnamed: 0,block_group,price,speed_down,speed_up,speed_unit,technology,package,fastest_speed_down,fastest_speed_price,speed_down_bins,redlining_grade,median_household_income,income_level,n_providers,internet_perc_broadband,lats,lons
0,270531088001,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),D,60417.0,Middle-Lower,3.0,0.608025,"[44.9348799, 44.9366218, 44.9346924, 44.934467...","[-93.2257266, -93.2233313, -93.2265906, -93.22..."
1,270531088001,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),B,60417.0,Middle-Lower,3.0,0.608025,"[44.9348799, 44.9366218, 44.9346924, 44.934467...","[-93.2257266, -93.2233313, -93.2265906, -93.22..."
2,270531088001,50.0,30.0,1.5,Mbps,Not Fiber,HSI Upto 30 Mbps/1.5 Mbps Prepaid,30.0,50.0,Medium (25-99),D,60417.0,Middle-Lower,3.0,0.608025,"[44.9348799, 44.9366218, 44.9346924, 44.934467...","[-93.2257266, -93.2233313, -93.2265906, -93.22..."
3,270531088001,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),C,60417.0,Middle-Lower,3.0,0.608025,"[44.9348799, 44.9366218, 44.9346924, 44.934467...","[-93.2257266, -93.2233313, -93.2265906, -93.22..."
4,270530095002,50.0,200.0,200.0,Mbps,Fiber,HSI Upto 200 Mbps/200 Mbps Prepaid,940.0,65.0,Blazing (≥200),B,50278.0,Middle-Lower,3.0,0.869919,"[44.9348538, 44.9365321, 44.9346378, 44.936396...","[-93.2646029, -93.2646121, -93.2628871, -93.26..."


In [21]:
merged_df.count()

block_group                1553
price                      1553
speed_down                 1553
speed_up                   1553
speed_unit                 1553
technology                 1553
package                    1553
fastest_speed_down         1553
fastest_speed_price        1553
speed_down_bins            1553
redlining_grade            1553
median_household_income    1553
income_level               1553
n_providers                1553
internet_perc_broadband    1553
lats                       1553
lons                       1553
dtype: int64

In [23]:
merged_df.to_sql('Minneapolis_Centurylink_Grouped', conn, if_exists='replace', index= False)

1553

In [6]:
c = conn.cursor()

results = c.execute('''SELECT * from Minneapolis_CenturyLink''').fetchall()

results[0]

('3739 SNELLING AVE,MINNEAPOLIS,MN 55406,USA',
 'minneapolis',
 'MN',
 44.9348799,
 -93.2257266,
 270531088001,
 1650164907,
 'CenturyLink',
 50.0,
 200.0,
 200.0,
 'Mbps',
 'Fiber',
 'HSI Upto 200 Mbps/200 Mbps Prepaid',
 940.0,
 65.0,
 'Blazing (≥200)',
 'D',
 0.6405750798722045,
 'least white',
 60417.0,
 5651.0,
 'Middle-Lower',
 3329.255969792054,
 3,
 0.6080246913580247)

In [7]:
names = list(map(lambda x: x[0], c.description))

data = pd.DataFrame(results, columns=names)

In [8]:
data.head()

Unnamed: 0,address_full,major_city,state,lat,lon,block_group,collection_datetime,provider,price,speed_down,...,speed_down_bins,redlining_grade,race_perc_non_white,race_quantile,median_household_income,income_dollars_below_median,income_level,ppl_per_sq_mile,n_providers,internet_perc_broadband
0,"3739 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.93488,-93.225727,270531088001,1650164907,CenturyLink,50.0,200.0,...,Blazing (≥200),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3,0.608025
1,"3636 34TH AVE S,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.936622,-93.223331,270531088001,1650164906,CenturyLink,50.0,200.0,...,Blazing (≥200),B,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3,0.608025
2,"3740 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934692,-93.226591,270531088001,1650164915,CenturyLink,50.0,200.0,...,Blazing (≥200),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3,0.608025
3,"3759 1/2 SNELLING AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934468,-93.22521,270531088001,1650164918,CenturyLink,50.0,30.0,...,Medium (25-99),D,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3,0.608025
4,"3741 MINNEHAHA AVE,MINNEAPOLIS,MN 55406,USA",minneapolis,MN,44.934691,-93.22382,270531088001,1650164916,CenturyLink,50.0,200.0,...,Blazing (≥200),C,0.640575,least white,60417.0,5651.0,Middle-Lower,3329.25597,3,0.608025


In [13]:
conn.close()

In [10]:
names

['address_full',
 'major_city',
 'state',
 'lat',
 'lon',
 'block_group',
 'collection_datetime',
 'provider',
 'price',
 'speed_down',
 'speed_up',
 'speed_unit',
 'technology',
 'package',
 'fastest_speed_down',
 'fastest_speed_price',
 'speed_down_bins',
 'redlining_grade',
 'race_perc_non_white',
 'race_quantile',
 'median_household_income',
 'income_dollars_below_median',
 'income_level',
 'ppl_per_sq_mile',
 'n_providers',
 'internet_perc_broadband']