In [2]:
import pandas as pd
import os

In [3]:
location_raw = pd.read_csv('location_raw.csv', index_col='datetime', parse_dates=True, dtype={'County.FIPS':str})

In [4]:
main_fips_set = set(location_raw['County.FIPS'].unique())

In [5]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/ma'
             'ster/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [6]:
keys_to_extract = ['STATE', 'COUNTY']

In [7]:
records = []
for item in counties.get('features'):
    base = item.get('properties')
    res = dict(filter(lambda item: item[0] in keys_to_extract, base.items()))
    records.append(res)

In [8]:
counties_fips = pd.DataFrame.from_records(records).assign(fips=lambda df_: df_.STATE+df_.COUNTY)

In [10]:
counties_fips_set = set(counties_fips['fips'])

In [11]:
len(counties_fips_set)

3221

In [12]:
population_path = os.path.join(
    os.getcwd(),
    'pop_data/nhgis0002_csv/nhgis0002_ts_nominal_county.csv'
    )

In [14]:
population_df = pd.read_csv(
    population_path,
    dtype={'STATEFP':str, 'COUNTYFP':str})

In [15]:
pop_fips = (population_df
    # selecing rows that have both county and state codes present
    .loc[lambda df_: 
        (~df_['STATEFP'].isna()) & 
        (~df_['COUNTYFP'].isna())]
    # combining the codes into one fips code
    .assign(fips=lambda df_:
        df_['STATEFP']+df_['COUNTYFP'])
    # dropping District of Columbia(11) and Puerto Rico(72)
    .loc[lambda df_:
        ~df_['STATEFP'].isin(['11', '72'])]
    )


In [16]:
pop_fips_set = set(pop_fips['fips'].unique())

In [17]:
len(pop_fips_set.difference(main_fips_set))

408

In [18]:
# pop_fips_set
# main_fips_set
# counties_fips_set

In [19]:
# Counties in main dataset not contained in the map subset
# Counties that may have changed
main_fips_set.difference(counties_fips_set)

{'02063', '02066', '02158', '46102'}

In [20]:
# Fips codes from population dataset not in counties dataset
# Some of these may be from counties that no longer exist or
# whose fips code has changed
pop_fips_set.difference(counties_fips_set)

{'02010',
 '02030',
 '02040',
 '02062',
 '02063',
 '02066',
 '02080',
 '02120',
 '02140',
 '02158',
 '02160',
 '02190',
 '02200',
 '02201',
 '02210',
 '02231',
 '02232',
 '02250',
 '02260',
 '02280',
 '12025',
 '30113',
 '32025',
 '46001',
 '46102',
 '46131',
 '46133',
 '51055',
 '51123',
 '51129',
 '51151',
 '51189',
 '51560',
 '51780',
 '56047'}

In [21]:
location_raw.loc[location_raw['County.FIPS'].isin(main_fips_set.difference(counties_fips_set))]

Unnamed: 0_level_0,city,state,country,shape,latitude,comment_length,duration_seconds,longitude,query_string,json,selected_json,status,Block.FIPS,Block.bbox,County.FIPS,County.name,State.FIPS,State.code,State.name
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1957-10-01 16:00:00,nelchina (near),ak,us,unknown,61.991944,24.0,180.0,-146.768611,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '020660001001261', 'bbox': ...","{'Block': {'FIPS': '020660001001261', 'bbox': ...",OK,20660001001261,"[-146.947138, 61.949935, -146.680945, 62.026792]",2066,Copper River Census Area,2,AK,Alaska
1969-07-05 10:53:00,valdez,ak,us,disk,61.130833,2.0,1020.0,-146.348333,https://geo.fcc.gov/api/census/block/find?lati...,"{'messages': [""FCC0001: The coordinate lies on...","{'Block': {'FIPS': '020630003001004', 'bbox': ...",OK,20630003001004,"[-146.349969, 61.129133, -146.344487, 61.130852]",2063,Chugach Census Area,2,AK,Alaska
1974-11-15 09:00:00,cordova,ak,us,disk,60.542778,13.0,,-145.7575,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '020630002002015', 'bbox': ...","{'Block': {'FIPS': '020630002002015', 'bbox': ...",OK,20630002002015,"[-145.7586, 60.542303, -145.757014, 60.543214]",2063,Chugach Census Area,2,AK,Alaska
1980-08-05 23:15:00,glennallen,ak,us,rectangle,62.109167,6.0,1500.0,-145.546389,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '020660001001236', 'bbox': ...","{'Block': {'FIPS': '020660001001236', 'bbox': ...",OK,20660001001236,"[-145.583228, 62.108621, -145.529862, 62.120348]",2066,Copper River Census Area,2,AK,Alaska
1989-10-15 22:00:00,glennallen,ak,us,unknown,62.109167,11.0,,-145.546389,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '020660001001236', 'bbox': ...","{'Block': {'FIPS': '020660001001236', 'bbox': ...",OK,20660001001236,"[-145.583228, 62.108621, -145.529862, 62.120348]",2066,Copper River Census Area,2,AK,Alaska
1995-03-28 00:36:00,cordova,ak,us,,60.542778,22.0,900.0,-145.7575,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '020630002002015', 'bbox': ...","{'Block': {'FIPS': '020630002002015', 'bbox': ...",OK,20630002002015,"[-145.7586, 60.542303, -145.757014, 60.543214]",2063,Chugach Census Area,2,AK,Alaska
1996-03-02 00:30:00,mountain village,ak,us,,62.085556,21.0,,-163.729444,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '021580001003091', 'bbox': ...","{'Block': {'FIPS': '021580001003091', 'bbox': ...",OK,21580001003091,"[-163.732804, 62.085033, -163.722817, 62.090731]",2158,Kusilvak Census Area,2,AK,Alaska
1999-07-04 21:00:00,arcadia,ca,us,,34.139722,25.0,480.0,-118.034444,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '060374307211011', 'bbox': ...","{'Block': {'FIPS': '060374307211011', 'bbox': ...",OK,461029410001139,"[-102.55642, 43.022061, -102.551761, 43.025903]",46102,Oglala Lakota County,46,SD,South Dakota
1999-07-04 21:00:00,pine ridge,sd,us,light,43.025556,20.0,20.0,-102.555833,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '461029410001139', 'bbox': ...","{'Block': {'FIPS': '461029410001139', 'bbox': ...",OK,461029410001139,"[-102.55642, 43.022061, -102.551761, 43.025903]",46102,Oglala Lakota County,46,SD,South Dakota
2000-05-01 23:00:00,red shirt (near),sd,us,light,43.6675,16.0,180.0,-102.898333,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '461029411001001', 'bbox': ...","{'Block': {'FIPS': '461029411001001', 'bbox': ...",OK,461029411001001,"[-102.911143, 43.66737, -102.892154, 43.674908]",46102,Oglala Lakota County,46,SD,South Dakota


In [22]:
location_raw.columns

Index(['city', 'state', 'country', 'shape', 'latitude', 'comment_length',
       'duration_seconds', 'longitude', 'query_string', 'json',
       'selected_json', 'status', 'Block.FIPS', 'Block.bbox', 'County.FIPS',
       'County.name', 'State.FIPS', 'State.code', 'State.name'],
      dtype='object')

In [23]:
# puerto rico
main_fips_set.difference(pop_fips_set)

{'11001',
 '72013',
 '72015',
 '72019',
 '72029',
 '72055',
 '72071',
 '72077',
 '72079',
 '72081',
 '72097',
 '72099',
 '72113',
 '72117',
 '72119',
 '72127',
 '72129',
 '72139',
 '72141',
 '72143',
 '72145'}

In [24]:
len(pop_fips_set.difference(main_fips_set))

408

In [25]:
(location_raw['County.FIPS']=='1101').sum()

0

In [26]:
location_raw[location_raw['County.FIPS'] =='11001']

Unnamed: 0_level_0,city,state,country,shape,latitude,comment_length,duration_seconds,longitude,query_string,json,selected_json,status,Block.FIPS,Block.bbox,County.FIPS,County.name,State.FIPS,State.code,State.name
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2010-08-28 22:06:00,washington,dc,us,fireball,38.895,11.0,120.0,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia
2011-06-15 20:19:00,georgetown,dc,us,disk,38.904722,15.0,6.0,-77.062778,https://geo.fcc.gov/api/census/block/find?lati...,"{'messages': [""FCC0001: The coordinate lies on...","{'Block': {'FIPS': '110010001023013', 'bbox': ...",OK,110010001023013,"[-77.062811, 38.904709, -77.061176, 38.90519]",11001,District of Columbia,11,DC,District of Columbia
2011-12-11 20:08:00,washington,dc,us,cylinder,38.895,7.0,4.0,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia
2012-01-29 19:20:00,washington,dc,us,other,38.895,17.0,10.0,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia
2012-07-04 21:06:00,washington,dc,us,sphere,38.895,9.0,5.5,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia
2012-07-25 21:30:00,washington,dc,us,changing,38.895,23.0,900.0,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia
2012-08-09 03:00:00,washington,dc,us,light,38.895,7.0,600.0,-77.036667,https://geo.fcc.gov/api/census/block/find?lati...,"{'Block': {'FIPS': '110019800001037', 'bbox': ...","{'Block': {'FIPS': '110019800001037', 'bbox': ...",OK,110019800001037,"[-77.038489, 38.892609, -77.03462, 38.895201]",11001,District of Columbia,11,DC,District of Columbia


In [27]:
counties_fips[counties_fips['fips']=='11001']

Unnamed: 0,STATE,COUNTY,fips
792,11,1,11001


In [28]:
counties_fips[counties_fips['STATE']=='11']

Unnamed: 0,STATE,COUNTY,fips
792,11,1,11001
