In [115]:
# Reference: https://jupyterbook.org/interactive/hiding.html
# Use {hide, remove}-{input, output, cell} tags to hiding content

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display

sns.set()
sns.set_context('talk')
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option('display.max_rows', 7)
pd.set_option('display.max_columns', 8)
pd.set_option('precision', 2)
# This option stops scientific notation for pandas
# pd.set_option('display.float_format', '{:.2f}'.format)

def display_df(df, rows=pd.options.display.max_rows,
               cols=pd.options.display.max_columns):
    with pd.option_context('display.max_rows', rows,
                           'display.max_columns', cols):
        display(df)

# Co-located Sensors

In [166]:
from pathlib import Path

data_dir = Path('data')

!ls data

[35maqs_06-067-0010.csv[m[m            [35mlist_of_purpleair_sensors.json[m[m
[35mcleaned_purpleair_aqs[m[m          [35mmatched_pa_aqs.csv[m[m
[35mlist_of_aqs_sites.csv[m[m          [35mpurpleair_AMTS[m[m


In [145]:
aqs_sites = pd.read_csv(data_dir / 'list_of_aqs_sites.csv')
aqs_sites

Unnamed: 0,State Code,County Code,Site Number,Latitude,...,City Name,CBSA Name,Tribe Name,Extraction Date
0,01,1,1,32.44,...,Prattville,"Montgomery, AL",,2021-05-18
1,01,1,2,32.43,...,Prattville,"Montgomery, AL",,2021-05-18
2,01,1,3,32.33,...,Not in a City,"Montgomery, AL",,2021-05-18
...,...,...,...,...,...,...,...,...,...
20727,CC,8,7004,45.00,...,Not in a city,,"St. Regis Mohawk Tribe, New York",2021-05-18
20728,CC,11,1,49.15,...,Not in a city,,,2021-05-18
20729,CC,11,2,49.14,...,Not in a city,,,2021-05-18


In [146]:
!head data/list_of_purpleair_sensors.json | cut -c 1-70

{"version":"7.0.30",
"fields":
["ID","pm","pm_cf_1","pm_atm","age","pm_0","pm_1","pm_2","pm_3","pm_4"
"data":[
[20,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97,0.0,0.0,0.0,0.0,0.0,0
[47,null,null,null,4951,null,null,null,null,null,null,null,96,null,nul
[53,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,1.2,5.2,6.0,97,0.0,0.5,702.3,57.5,6.
[74,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97,0.0,0.0,0.0,0.0,0.0,0
[77,9.8,9.8,9.8,1,9.8,10.7,11.0,11.2,13.8,15.1,15.5,97,9.7,9.8,6523.5,
[81,6.5,6.5,6.5,0,6.5,6.1,6.1,6.6,8.1,8.3,9.7,97,5.9,6.8,4058.9,346.1,


In [147]:
import json

with open(data_dir / 'list_of_purpleair_sensors.json') as f:
    pa_json = json.load(f)

list(pa_json.keys())

['version', 'fields', 'data', 'count']

In [148]:
pa_sites = pd.DataFrame(pa_json['data'], columns=pa_json['fields'])
pa_sites

Unnamed: 0,ID,pm,pm_cf_1,pm_atm,...,Voc,Ozone1,Adc,CH
0,20,0.0,0.0,0.0,...,,,0.01,1
1,47,,,,...,,0.72,0.72,0
2,53,0.0,0.0,0.0,...,,,0.00,1
...,...,...,...,...,...,...,...,...,...
23135,132237,5.3,5.3,5.3,...,,,0.00,3
23136,132431,3.2,3.2,3.2,...,,,0.03,3
23137,132471,0.5,0.5,0.5,...,,,0.05,3


Here, we would do a join on latitude and longitude to find PA and AQS sensors
that are within 50 meters of each other. The paper authors then contacted each
AQS site to see whether the sensors are actually co-located. We'll use their
list of co-located sensors.

Final list of co-located sensors:

In [151]:
matched = pd.read_csv(data_dir / 'matched_pa_aqs.csv')
matched

Unnamed: 0,PA ID,PA name (not included for private sensors),AQS site,Start Date,End Date
0,DE1,Private,10-030-2004,7/27/2019,11/18/2019
1,AK2,Ncore 2,02-090-0034,11/7/2018,1/12/2019
2,AK4,Ncore 3,02-090-0034,1/15/2019,6/16/2019
...,...,...,...,...,...
47,WI2,Private,55-087-0009,1/6/2019,10/24/2019
48,WI3,Private,55-087-0009,3/30/2019,4/4/2019
49,WI6,Private,55-133-0027,1/1/2019,3/27/2019
