In [382]:
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import Point
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import copy

In [383]:
##FCC Data from June 2019 (Most current)
fcc_bbnd_june19 = pd.read_csv('../data/FCC_Broadband/TN-Fixed-Jun2019-v1.csv')

##For details on original columns: https://www.fcc.gov/general/explanation-broadband-deployment-data
# Code	Technology of Transmission
# 10	Asymmetric xDSL
# 11	ADSL2, ADSL2+
# 12	VDSL
# 20	Symmetric xDSL*
# 30	Other Copper Wireline (all copper-wire based technologies other than xDSL; Ethernet over copper and T-1 are examples)
# 40	Cable Modem other than DOCSIS 1, 1.1, 2.0, 3.0, or 3.1
# 41	Cable Modem – DOCSIS 1, 1.1 or 2.0
# 42	Cable Modem – DOCSIS 3.0
# 43	Cable Modem – DOCSIS 3.1
# 50	Optical Carrier / Fiber to the end user (Fiber to the home or business end user, does not include “fiber to the curb”)
# 60	Satellite
# 70	Terrestrial Fixed Wireless
# 90	Electric Power Line
# 0	All Other

## Urban Area definitions from 2018 Census Geographies 
## 'R' = Rural 'C' = Urban Cluster (2,500-50,000) 'U' = Urbanized Area (50,000+) 
urban_area_shape = gpd.read_file('../data/UrbanAreaShape2018/tl_2018_us_uac10.shp')

## Define central point of TN census tracts from 2010 Census Geographies
centroid_df = pd.read_csv('../data/CenPop2010_Mean_TR47.txt', converters={'TRACTCE': lambda x: str(x), 'COUNTYFP': lambda y: str(y)})

print(fcc_bbnd_june19.shape)
print(fcc_bbnd_june19.info())

print(urban_area_shape.shape)
print(urban_area_shape.info())

print(centroid_df.shape)
print(centroid_df.info())

(1484085, 17)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1484085 entries, 0 to 1484084
Data columns (total 17 columns):
 #   Column              Non-Null Count    Dtype  
---  ------              --------------    -----  
 0   LogRecNo            1484085 non-null  int64  
 1   Provider_Id         1484085 non-null  int64  
 2   FRN                 1484085 non-null  int64  
 3   ProviderName        1484085 non-null  object 
 4   DBAName             1484085 non-null  object 
 5   HoldingCompanyName  1484085 non-null  object 
 6   HocoNum             1484085 non-null  int64  
 7   HocoFinal           1484085 non-null  object 
 8   StateAbbr           1484085 non-null  object 
 9   BlockCode           1484085 non-null  int64  
 10  TechCode            1484085 non-null  int64  
 11  Consumer            1484085 non-null  int64  
 12  MaxAdDown           1484085 non-null  float64
 13  MaxAdUp             1484085 non-null  float64
 14  Business            1484085 non-null  int64  
 15  M

In [384]:
## TN Tracts shape
tract_shape = gpd.read_file('../data/shapefiles/tl_2018_47_tract.shp')

In [385]:
tract_shape.crs
tract_shape.head()
tract_shape = tract_shape[['GEOID', 'geometry']]

In [386]:
urban_area_shape = urban_area_shape[['GEOID10', 'NAME10', 'UATYP10', 'ALAND10', 'geometry']]

In [387]:
print(fcc_bbnd_june19.groupby('TechCode').size())
print('\n')
print(fcc_bbnd_june19.groupby('Consumer').size())
print('\n')
print(fcc_bbnd_june19.groupby('Business').size())
print('\n')
print(fcc_bbnd_june19.groupby(['Consumer','Business']).size())

TechCode
10     66864
11    112629
12     67315
20       834
30      9860
40       334
41        61
42     15317
43    121866
50     68334
60    944385
70     76286
dtype: int64


Consumer
0     261604
1    1222481
dtype: int64


Business
0     258113
1    1225972
dtype: int64


Consumer  Business
0         1           261604
1         0           258113
          1           964368
dtype: int64


In [388]:
## Add column for census tract
fcc_bbnd_june19['TractCode'] = fcc_bbnd_june19['BlockCode'].apply(lambda x: str(x)[0:11])

In [389]:
## Drop extraneous columns
all_june19 = fcc_bbnd_june19.drop(['FRN', 'DBAName', 'HoldingCompanyName', 'HocoFinal', 'StateAbbr'], axis=1, inplace=False)

In [390]:
## Add frames with only sat and no sat

only_sat_june19 = fcc_bbnd_june19.loc[fcc_bbnd_june19['TechCode'] == 60]
no_sat_june19 = fcc_bbnd_june19.loc[fcc_bbnd_june19['TechCode'] != 60]

In [391]:
## Find how many census blocks are in each tract
count_blocks = no_sat_june19.groupby('TractCode')['BlockCode'].nunique().reset_index()
count_blocks.rename(columns={'BlockCode': 'CountBlocks'}, inplace=True)

In [392]:
## Merge block count into fcc df
no_sat_june19_countd = no_sat_june19.merge(count_blocks, how='inner', on='TractCode')

In [393]:
## Make dfs which are subsetted for Consumer and Business service, as well as both
consumer_june19 =  no_sat_june19_countd[no_sat_june19_countd['Consumer'] == 1]
business_june19 =  no_sat_june19_countd[no_sat_june19_countd['Business'] == 1]
both_cnsm_biz_june19 = consumer_june19[consumer_june19['Business'] == 1]

consumer_june19_25mbps = consumer_june19[consumer_june19['MaxAdDown'] >= 25.0]

In [394]:
top_down_per_block = consumer_june19.groupby(['BlockCode', 'TractCode'])['MaxAdDown'].max().reset_index()
top_down_per_block = top_down_per_block[top_down_per_block['MaxAdDown'] > 0.0]
avg_servable_speed = top_down_per_block.groupby('TractCode')['MaxAdDown'].mean().reset_index()
avg_servable_speed.to_csv('../out/avg_servable_speed.csv')

In [395]:
## Determine how many blocks in a tract have a *choice of services* at levels of any / 3 / 12 / 25 mbps
provider_block_choice = consumer_june19.groupby(['BlockCode', 'TractCode'])['Consumer'].sum().reset_index()
provider_block_choices = provider_block_choice[provider_block_choice['Consumer'] >= 2]

provider_block_choice_25mbps = consumer_june19_25mbps.groupby(['BlockCode', 'TractCode'])['Consumer'].sum().reset_index()
provider_block_choices_25mbps = provider_block_choice_25mbps[provider_block_choice_25mbps['Consumer'] >= 2]

block_choices_counted = provider_block_choices.groupby(['TractCode']).count().reset_index()
block_choices_counted.drop('BlockCode', axis=1, inplace=True)
block_choices_counted.rename(columns={'Consumer': 'Blocks With Choice'}, inplace=True)

block_choices_counted_25mbps = provider_block_choices_25mbps.groupby(['TractCode']).count().reset_index()
block_choices_counted_25mbps.drop('BlockCode', axis=1, inplace=True)
block_choices_counted_25mbps.rename(columns={'Consumer': 'Blocks With Choice 25mbps'}, inplace=True)

consumer_choice_dfs = [block_choices_counted, block_choices_counted_3mbps, block_choices_counted_12mbps, block_choices_counted_25mbps]

In [396]:
## Create dataframe to show proportion of blocks in each tract with a choice
choice_levels_add = count_blocks.copy()
for df in consumer_choice_dfs:    
    choice_levels_add = choice_levels_add.merge(df, how='left', on='TractCode')
    
choice_levels_add['Prop With Choice'] = round((100 * choice_levels_add['Blocks With Choice'] / choice_levels_add['CountBlocks']), 2)
choice_levels_add['Prop With Choice 25mbps'] = round((100 * choice_levels_add['Blocks With Choice 25mbps'] / choice_levels_add['CountBlocks']), 2)

In [397]:
choice_levels_add = choice_levels_add.fillna(0)
choice_levels_add.to_csv('../out/choice_levels.csv')

In [398]:
## Determine how many blocks in a tract are served *by a given provider*.
provider_block_served_consumer = consumer_june19.groupby(['ProviderName', 'BlockCode', 'TractCode'])['Consumer'].max().reset_index()

sum_blocks_by_tract_consumer = provider_block_served_consumer.groupby(['ProviderName','TractCode'])['Consumer'].sum().reset_index()

sum_blocks_by_tract_consumer.rename(columns={'Consumer': 'Consumer Blocks Served'}, inplace=True)

In [399]:
## Determine how many provider blocks in a tract are served at 25 mbps
provider_block_served_consumer_25mbps = consumer_june19_25mbps.groupby(['ProviderName', 'BlockCode', 'TractCode'])['Consumer'].max().reset_index()
sum_blocks_by_tract_consumer_25mbps = provider_block_served_consumer_25mbps.groupby(['ProviderName','TractCode'])['Consumer'].sum().reset_index()
sum_blocks_by_tract_consumer_25mbps.rename(columns={'Consumer': 'Consumer Blocks Served 25mbps'}, inplace=True)

In [400]:
## Determine how many blocks in a tract are served *by anyone*.
block_served_consumer = consumer_june19.groupby(['BlockCode', 'TractCode'])['Consumer'].max().reset_index()
blocks_served_any_pro_consumer = block_served_consumer.groupby(['TractCode'])['Consumer'].sum().reset_index()
blocks_served_any_pro_consumer.rename(columns={'Consumer': 'Consumer Blocks Served Any Pro'}, inplace=True)

In [401]:
## Determine how many blocks in a tract are served at 25 mbps by any provider
block_served_consumer_25mbps = consumer_june19_25mbps.groupby(['BlockCode', 'TractCode'])['Consumer'].max().reset_index()
blocks_served_any_pro_consumer_25mbps = block_served_consumer_25mbps.groupby(['TractCode'])['Consumer'].sum().reset_index()
blocks_served_any_pro_consumer_25mbps.rename(columns={'Consumer': 'Consumer Blocks Served Any Pro 25mbps'}, inplace=True)

In [402]:
consumer_count_any_pro = count_blocks.copy()
consumer_any_pro_dfs = [blocks_served_any_pro_consumer, blocks_served_any_pro_consumer_25mbps]
for df in consumer_any_pro_dfs:    
    consumer_count_any_pro = consumer_count_any_pro.merge(df, how='left', on='TractCode')

consumer_count_any_pro['Prop Served By Any'] = round((100 * consumer_count_any_pro['Consumer Blocks Served Any Pro'] / consumer_count_any_pro['CountBlocks']), 2)
consumer_count_any_pro['Prop Served By Any 25mbps'] = round((100 * consumer_count_any_pro['Consumer Blocks Served Any Pro 25mbps'] / consumer_count_any_pro['CountBlocks']), 2)

In [403]:
consumer_count_any_pro = consumer_count_any_pro.fillna(0)
consumer_count_any_pro.to_csv('../out/consumers_any_provider.csv')

In [404]:
short_consumer_june19 = consumer_june19[['ProviderName', 'TechCode', 'TractCode', 'CountBlocks']]

short_consumer_combo = short_consumer_june19.merge(sum_blocks_by_tract_consumer, how='left', on=['ProviderName', 'TractCode'])
short_consumer_combo = short_consumer_combo.merge(sum_blocks_by_tract_consumer_25mbps, how='left', on=['ProviderName', 'TractCode'])

short_consumer_combo.drop_duplicates(inplace=True)
short_consumer_combo = short_consumer_combo.fillna(0)
short_consumer_combo = short_consumer_combo.astype({'Consumer Blocks Served 25mbps': 'int64'}, copy=False)
short_consumer_combo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9436 entries, 0 to 512740
Data columns (total 6 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   ProviderName                   9436 non-null   object
 1   TechCode                       9436 non-null   int64 
 2   TractCode                      9436 non-null   object
 3   CountBlocks                    9436 non-null   int64 
 4   Consumer Blocks Served         9436 non-null   int64 
 5   Consumer Blocks Served 25mbps  9436 non-null   int64 
dtypes: int64(4), object(2)
memory usage: 516.0+ KB


In [405]:
## In each tract that is served by a provider, how many blocks do they serve?
## Consumer:

##block_sum_consumers = consumer_june19.groupby(['TractCode', 'ProviderName', 'CountBlocks', 'TechCode'])['Consumer'].sum().reset_index()
## Business
##block_sum_businesses = business_june19.groupby(['TractCode', 'ProviderName', 'CountBlocks', 'TechCode'])['Business'].sum().reset_index()

short_consumer_combo['Proportion Served'] = round((100 * short_consumer_combo['Consumer Blocks Served'] / short_consumer_combo['CountBlocks']), 2)
short_consumer_combo['Proportion Served 25mbps'] = round((100 * short_consumer_combo['Consumer Blocks Served 25mbps'] / short_consumer_combo['CountBlocks']), 2)

short_consumer_combo['Substantially Served'] = short_consumer_combo['Proportion Served'] >= 30.0
short_consumer_combo['Substantially Served 25mbps'] = short_consumer_combo['Proportion Served 25mbps'] >= 30.0

In [406]:
short_consumer_for_set = short_consumer_combo.copy()

In [407]:
consumer_combo_no_code = short_consumer_combo.drop('TechCode', axis=1, inplace=False)
consumer_combo_no_code.drop_duplicates(inplace=True)
set_codes = short_consumer_for_set.groupby(['ProviderName', 'TractCode'])['TechCode'].agg(lambda x : set(x)).reset_index()
consumer_set_codes = consumer_combo_no_code.merge(set_codes, how='left', on=['ProviderName', 'TractCode'])

In [408]:
consumer_set_codes['Minimally Residential'] = consumer_set_codes['TractCode'].map(lambda x: x[5:7] == '98')
consumer_set_codes['Water Area'] = consumer_set_codes['TractCode'].map(lambda x: x[5:7] == '99')
consumer_set_codes['Consumer Service N/A'] = consumer_set_codes['Water Area'] ^ consumer_set_codes['Minimally Residential']

In [409]:
## TODO: Expand the set to a column for each
consumer_set_codes.rename({'TechCode': 'TechCodes Set'}, axis=1, inplace=True)
consumer_set_codes.to_csv('../out/consumers_served.csv')

In [411]:
providers_serving_tract = consumer_set_codes.groupby('TractCode')[['Substantially Served', 'Substantially Served 25mbps']].sum().reset_index()
providers_serving_tract.to_csv('../out/providers_serving_tract.csv')

In [412]:
census_url = 'https://api.census.gov/data'
year = '2018'
dataset = 'acs/acs5'
base_url = '/'.join([census_url, year, dataset])
get_commute_vars = ['B08303_' + str(i + 1).zfill(3) + "E" for i in range(0, 13)]
get_wfh_vars = ['B08301_001E', 'B08301_021E']
get_tract_vars = ['NAME'] + get_wfh_vars + get_commute_vars
predicates = {}
predicates['get'] = ','.join(get_tract_vars)
predicates['for'] = 'tract:*'
predicates['in'] = 'state:47'
request = requests.get(base_url, params=predicates)
col_names = ['NAME', 'Workers - Commute Method', 'Work From Home', 
             'Workers - Commute Length', 'Less Than 5', '5 to 9', '10 to 14', '15 to 19', '20 to 24', 
             '25 to 29', '30 to 34', '35 to 39', '40 to 44', '45 to 59', '60 to 89', '90 plus',
             'state', 'county', 'tract']
wfh_commute_df = pd.DataFrame(data=request.json()[1:], columns=col_names)

In [413]:
##wfh_df['TractCode'] = wfh_df['state'] + wfh_df['county'] + wfh_df['tract']
commute_times = [4, 9, 14, 19, 24, 29, 34, 39, 44, 59, 89, 120]
just_commute = wfh_commute_df.drop(['NAME', 'Workers - Commute Method', 'Work From Home', 'Workers - Commute Length', 'state', 'county', 'tract'], axis='columns')
just_commute = just_commute.astype('int64')
just_commute.sum(axis=1)
mul_commute = just_commute * commute_times
sr = round((mul_commute.sum(axis=1) / just_commute.sum(axis=1)), 2).rename('Median Commute Length').fillna(0)
print(type(sr))
wfh_commute_simple = wfh_commute_df.merge(sr, left_index=True, right_index=True)
wfh_commute_simple = wfh_commute_simple.astype({'Workers - Commute Method':'int64', 'Work From Home': 'int64'})
wfh_commute_simple['TractCode'] = wfh_commute_simple['state'] + wfh_commute_simple['county'] + wfh_commute_simple['tract']
wfh_commute_simple['Proportion Work From Home'] = round(100 * (wfh_commute_simple['Work From Home'] / wfh_commute_simple['Workers - Commute Method']), 2)
wfh_commute_reduced = wfh_commute_simple[['NAME', 'Workers - Commute Method', 
                                          'Work From Home', 'Proportion Work From Home', 
                                          'Workers - Commute Length', 'Median Commute Length','TractCode']]
wfh_commute_reduced.head()

<class 'pandas.core.series.Series'>


Unnamed: 0,NAME,Workers - Commute Method,Work From Home,Proportion Work From Home,Workers - Commute Length,Median Commute Length,TractCode
0,"Census Tract 114.13, Hamilton County, Tennessee",4078,201,4.93,3877,29.59,47065011413
1,"Census Tract 9502, Hardeman County, Tennessee",921,38,4.13,883,37.32,47069950200
2,"Census Tract 9504, Hardeman County, Tennessee",2077,61,2.94,2016,33.13,47069950400
3,"Census Tract 9501, Hardeman County, Tennessee",1431,77,5.38,1354,38.87,47069950100
4,"Census Tract 9505, Hardeman County, Tennessee",1476,94,6.37,1382,39.44,47069950500


In [414]:
wfh_commute_reduced.to_csv('../out/census_wfh_commute.csv')

In [415]:
get_vars_ed = ['B15003_' + str(i + 1).zfill(3) + 'E' for i in range(0, 25)]
get_vars_ed = ['NAME'] + get_vars_ed
predicates_ed = {}
predicates_ed['get'] = ','.join(get_vars_ed)
predicates_ed['for'] = 'tract:*'
predicates_ed['in']  = 'state:47'  ##State of Tennessee
requests_ed = requests.get(base_url, params=predicates_ed)
col_names = ['NAME', 'Total', '002', '003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014',
             '015', '016', '017', '018', '019', '020', '021', '022', '023', '024', '025', 'state', 'county', 'tract']
ed_df = pd.DataFrame(columns=col_names, data=requests_ed.json()[1:])
ints_dict = {}
int_cols = [e for e in col_names if e not in ('NAME', 'state', 'county', 'tract')]
for f in int_cols:
    ints_dict[f] = 'int'
ed_df = ed_df.astype(ints_dict)  ##Change measures to be ints so they can be calculated

ed_df['GEOID'] = ed_df['state'] + ed_df['county']  ##We will join on this column
ed_df = ed_df.astype({'GEOID': 'object'})

running = pd.DataFrame()
running = ed_df[[str(i).zfill(3) for i in range(2, 17)]]

##Add Summary columns
ed_df['Less_Than_High_School'] = running.sum(axis=1)
running = ed_df[['017', '018']]
ed_df['High_School_Or_GED'] = running.sum(axis=1)
running = ed_df[['019', '020', '021']]
ed_df['Some_College_Or_Associates'] = running.sum(axis=1)
running = ed_df[['022', '023', '024', '025']]
ed_df['Bachelors_Or_More'] = running.sum(axis=1)

##Create Proportion columns
ed_df['Less_HS_Proportion'] = round((100 * ed_df['Less_Than_High_School'] / ed_df['Total']), 2)
ed_df['HS_Proportion'] = round((100 * ed_df['High_School_Or_GED'] / ed_df['Total']), 2)
ed_df['Some_College_Proportion'] = round((100 * ed_df['Some_College_Or_Associates'] / ed_df['Total']), 2)
ed_df['Bachelors_Proportion'] = round((100 * ed_df['Bachelors_Or_More'] / ed_df['Total']), 2)
ed_df.head(50)

Unnamed: 0,NAME,Total,002,003,004,005,006,007,008,009,...,tract,GEOID,Less_Than_High_School,High_School_Or_GED,Some_College_Or_Associates,Bachelors_Or_More,Less_HS_Proportion,HS_Proportion,Some_College_Proportion,Bachelors_Proportion
0,"Census Tract 114.13, Hamilton County, Tennessee",5855,23,0,0,0,0,0,0,0,...,11413,47065,415,1843,2194,1403,7.09,31.48,37.47,23.96
1,"Census Tract 9502, Hardeman County, Tennessee",4712,45,0,0,0,3,15,18,10,...,950200,47069,1435,2202,761,314,30.45,46.73,16.15,6.66
2,"Census Tract 9504, Hardeman County, Tennessee",3514,15,0,0,0,0,0,0,8,...,950400,47069,440,1988,571,515,12.52,56.57,16.25,14.66
3,"Census Tract 9501, Hardeman County, Tennessee",2455,23,0,0,0,0,0,1,9,...,950100,47069,379,1157,600,319,15.44,47.13,24.44,12.99
4,"Census Tract 9505, Hardeman County, Tennessee",3017,17,0,0,0,0,9,0,6,...,950500,47069,600,1313,681,423,19.89,43.52,22.57,14.02
5,"Census Tract 9506, Hardeman County, Tennessee",1725,2,0,0,0,0,1,0,0,...,950600,47069,431,782,422,90,24.99,45.33,24.46,5.22
6,"Census Tract 1101, Morgan County, Tennessee",2041,18,0,0,0,0,0,0,0,...,110100,47129,309,1008,580,144,15.14,49.39,28.42,7.06
7,"Census Tract 1105, Morgan County, Tennessee",3584,91,0,0,0,0,0,0,7,...,110500,47129,642,1802,940,200,17.91,50.28,26.23,5.58
8,"Census Tract 1103, Morgan County, Tennessee",4676,37,0,0,0,0,11,11,7,...,110300,47129,1117,2442,807,310,23.89,52.22,17.26,6.63
9,"Census Tract 1104, Morgan County, Tennessee",2881,50,0,0,0,0,0,0,23,...,110400,47129,533,1257,879,212,18.5,43.63,30.51,7.36


In [416]:
ed_df = ed_df[['state', 'county', 'tract','Less_HS_Proportion', 'HS_Proportion', 'Some_College_Proportion', 'Bachelors_Proportion']].fillna(0)
ed_df['TractCode'] = ed_df['state'] + ed_df['county'] + ed_df['tract']
ed_df.to_csv('../out/education_levels.csv')

In [None]:
# mappable_consumers = tract_shape.merge(consumer_set_codes, how='left', left_on='GEOID', right_on='TractCode')
# mappable_provider_tract = tract_shape.merge(providers_serving_tract, how='left', left_on='GEOID', right_on='TractCode')
# mappable_consumers.to_csv('../out/mappable_consumer_with_providers.csv')
# mappable_provider_tract.to_csv('../out/mappable_providers_serving_tract.csv')

In [None]:
# map_top_providers_dfs = []
# top_n_providers = list(consumer_set_codes.groupby('ProviderName').size().sort_values().tail(10).index)
# rp = copy.copy(cm.get_cmap('tab20c'))
# for x in top_n_providers:
#     provider_sub = consumer_set_codes[consumer_set_codes['ProviderName'] == x]
#     mappable = tract_shape.merge(provider_sub, how='left', left_on='GEOID', right_on='TractCode')
#     map_top_providers_dfs.append(mappable)
# for provider_map in map_top_providers_dfs:
#     provider_map.plot(column='Substantially Served', cmap=rp)

In [418]:
centroid_df.drop('POPULATION', axis=1, inplace=True)

In [419]:
centroid_df = centroid_df.astype({'STATEFP': 'str'}, copy=False)

In [420]:
centroid_df['TractCode'] = centroid_df['STATEFP'] + centroid_df['COUNTYFP'] + centroid_df['TRACTCE']

In [421]:
centroid_df['geometry'] = centroid_df.apply(lambda x: Point((float(x['LONGITUDE']), float(x['LATITUDE']))), axis=1)

In [422]:
centroid_geo = gpd.GeoDataFrame(centroid_df, crs = urban_area_shape.crs, geometry = centroid_df['geometry'])
centroid_geo = centroid_geo[['LATITUDE', 'LONGITUDE', 'TractCode', 'geometry']]

In [424]:
ua_of_census_tracts = gpd.sjoin(centroid_geo, urban_area_shape, op = 'within')

Unnamed: 0,LATITUDE,LONGITUDE,TractCode,geometry,index_right,GEOID10,NAME10,UATYP10,ALAND10
0,36.001828,-84.268796,47001020100,POINT (-84.26880 36.00183),865,45640,"Knoxville, TN",U,1134806494
1,36.017235,-84.213287,47001020201,POINT (-84.21329 36.01723),865,45640,"Knoxville, TN",U,1134806494
2,36.022426,-84.228827,47001020202,POINT (-84.22883 36.02243),865,45640,"Knoxville, TN",U,1134806494
3,36.041852,-84.226627,47001020300,POINT (-84.22663 36.04185),865,45640,"Knoxville, TN",U,1134806494
4,36.026023,-84.255785,47001020400,POINT (-84.25579 36.02602),865,45640,"Knoxville, TN",U,1134806494


In [425]:
ua_of_census_tracts.drop('index_right', axis=1, inplace=True)

In [426]:
##Find any rows where Tract is not in the spatially merged df
centroid_geo_rural = centroid_geo[centroid_geo['TractCode'].isin(ua_of_census_tracts['TractCode'].tolist()) == False]

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 606 entries, 7 to 1496
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   LATITUDE   606 non-null    float64 
 1   LONGITUDE  606 non-null    float64 
 2   TractCode  606 non-null    object  
 3   geometry   606 non-null    geometry
dtypes: float64(2), geometry(1), object(1)
memory usage: 23.7+ KB


In [427]:
## UATYP10 is 'R' for Rural, 'U' for Urbanized Area (50,000+), and 'C' for Urban Cluster (2,500 - 50,000).

centroid_rural_match_cols = pd.concat([centroid_geo_rural, pd.DataFrame(columns=['GEOID10', 'NAME10', 'UATYP10', 'ALAND10'])])
centroid_rural_match_cols['UATYP10'] = 'R'
centroid_rural_match_cols['ALAND10'] = centroid_rural_match_cols['ALAND10'].fillna(np.nan)
centroid_rural_match_cols['NAME10'] = centroid_rural_match_cols['NAME10'].fillna('Rural Area')

In [428]:
tract_by_ua_w_rural = pd.concat([centroid_rural_match_cols, ua_of_census_tracts], axis=0)

In [429]:
tract_by_ua_w_rural['UATYP10'].unique()
tract_by_ua_w_rural.to_csv('../out/tracts_urban_def.csv')

In [None]:
# census_url = 'https://api.census.gov/data'
# year = '2010'
# dataset = 'dec/sf1'
# base_url = '/'.join([census_url, year, dataset])
# get_tract_vars = ['P001001']
# get_tract_vars = ["NAME"] + get_tract_vars
# predicates = {}
# predicates['get'] = ','.join(get_tract_vars)
# predicates['for'] = 'urban area:*'  ## zctas
# ##predicates['in'] = 'state:47'
# request = requests.get(base_url, params=predicates)
# ##col_names = ['NAME', 'Population', 'zip']
# census_df = pd.DataFrame(data=request.json()[1:])

# ##https://api.census.gov/data/2010/dec/sf1?get=P001001,NAME&for=urban%20rural:*&in=state:01%20county:087%20county%20subdivision:93230%20place/remainder%20(or%20part):77304%20tract%20(or%20part):231900

In [None]:
# census_url = 'https://api.census.gov/data'
# year = '2018'
# dataset = 'acs/acs5'
# base_url = '/'.join([census_url, year, dataset])
# get_tract_vars = ['B01003_001E']
# get_tract_vars = ["NAME"] + get_tract_vars
# predicates = {}
# predicates['get'] = ','.join(get_tract_vars)
# predicates['for'] = 'urban area:*'  ## zctas
# ##predicates['in'] = 'urban area:*'

# request = requests.get(base_url, params=predicates)
# ##col_names = ['NAME', 'Population', 'zip']
# census_df = pd.DataFrame(data=request.json()[1:])