# PurpleAir Stations QAQC

## Import Packages

In [131]:
import os
import requests 
import datetime as dt
import pandas as pd
import arcpy

## Set Working Environment

In [74]:
cwd = os.getcwd() # This is a global variable for where the notebook is (must change if running in arcpro)

# Make it workspace

arcpy.env.workspace = os.path.join(cwd, '..', '..', 'data', 'QAQC.gdb')

arcpy.env.overwriteOutput = True # Overwrite layers is okay

## Set Bounds for PurpleAir Parameters

In [75]:
#Changing the projection from UMT 15 to NAD 84
arcpy.management.Project("mpls_8km", "mpls_8km_Project", 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]', "WGS_1984_(ITRF00)_To_NAD_1983", 'PROJCS["NAD_1983_UTM_Zone_15N",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-93.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]', "NO_PRESERVE_SHAPE", None, "NO_VERTICAL")

In [76]:
#Create bounding box
arcpy.management.MinimumBoundingGeometry("mpls_8km_Project", "mpls_8km_Proje_MinimumBoundi", "RECTANGLE_BY_AREA")

In [77]:
#Setting lat/long for PurpleAir API Parameters
nwlng = arcpy.Describe("mpls_8km_Proje_MinimumBoundi").extent.XMin
nwlat = arcpy.Describe("mpls_8km_Proje_MinimumBoundi").extent.YMax
selng = arcpy.Describe("mpls_8km_Proje_MinimumBoundi").extent.XMax
selat = arcpy.Describe("mpls_8km_Proje_MinimumBoundi").extent.YMin

## Importing PurpleAir Station Data from PurpleAir API

In [78]:
def getSensorsData(query='', api_read_key=''):

    # my_url is assigned the URL we are going to send our request to.
    url = 'https://api.purpleair.com/v1/sensors?' + query
    
    print('Here is the full url for the API call:\n\n', url)

    # my_headers is assigned the context of our request we want to make. In this case
    # we will pass through our API read key using the variable created above.
    my_headers = {'X-API-Key':api_read_key}

    # This line creates and sends the request and then assigns its response to the
    # variable, r.
    response = requests.get(url, headers=my_headers)

    # We then return the response we received.
    return response

In [79]:
#PurpleAir API 'read' key
api = input('Please enter your Purple Air api key')

Please enter your Purple Air api key 51592903-B445-11ED-B6F4-42010A800007


In [187]:
#Set bounding strings for API parameters
bounds_strings = [f'nwlng={nwlng}',
                  f'nwlat={nwlat}',
                  f'selng={selng}',
                  f'selat={selat}']

bounds_string = '&'.join(bounds_strings)

print(bounds_string)

nwlng=-93.43083707299996&nwlat=45.12366876300007&selng=-93.09225748799997&selat=44.81791263300005


In [81]:
#Setting parameters for API
fields = ['name', 'firmware_version','date_created','last_modified','last_seen','uptime','position_rating','channel_state','channel_flags','altitude',
          'location_type','latitude', 'longitude']

fields_string = 'fields=' + '%2C'.join(fields)

print(fields_string)

fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude


In [82]:
#finalizing query for API function
query_string = '&'.join([fields_string, bounds_string])

print(query_string)

fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude&nwlng=-93.43083707299996&nwlat=45.12366876300007&selng=-93.09225748799997&selat=44.81791263300005


In [83]:
#calling the API
response = getSensorsData(query_string, api)

Here is the full url for the API call:

 https://api.purpleair.com/v1/sensors?fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude&nwlng=-93.43083707299996&nwlat=45.12366876300007&selng=-93.09225748799997&selat=44.81791263300005


In [180]:
response_dict = response.json() # Read response as a json (dictionary)

col_names = response_dict['fields']
data = np.array(response_dict['data'])

df = pd.DataFrame(data, columns = col_names)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   sensor_index      88 non-null     object
 1   last_modified     88 non-null     object
 2   date_created      88 non-null     object
 3   last_seen         88 non-null     object
 4   name              88 non-null     object
 5   location_type     88 non-null     object
 6   firmware_version  88 non-null     object
 7   uptime            88 non-null     object
 8   position_rating   88 non-null     object
 9   latitude          88 non-null     object
 10  longitude         88 non-null     object
 11  altitude          88 non-null     object
 12  channel_state     88 non-null     object
 13  channel_flags     88 non-null     object
dtypes: object(14)
memory usage: 9.8+ KB


## Cleaning PurpleAir Station Data

In [181]:
#visualizing API response
df.head()

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,location_type,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,1504993349,1504040633,1680215003,Howe Neighborhood,0,6.06b,5222,5,44.935818,-93.21752,833,3,0
1,5582,1660166545,1514335701,1680215014,Vircroft Ashnia,0,7.02,810,5,44.891655,-93.34291,899,3,0
2,137876,1637086469,1637082783,1680215021,King Field Indoors,1,7.02,29104,0,44.928917,-93.284706,886,3,0
3,11134,1529977499,1527023589,1680215020,Linden Hills,0,7.02,10036,5,44.92776,-93.32235,886,3,0
4,142718,1675359061,1642013869,1680214953,City of Minneapolis Community Air Monitoring P...,0,7.02,14557,5,44.995792,-93.295395,865,3,0


In [183]:
#find outside sensors
df_outside = df[df.location_type =='0']
len(df_outside)


80

In [184]:
#drop the location_type now that we have filtered for outdoor sensors only
df_stations = df_outside.drop('location_type', axis=1)
df_stations

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,1504993349,1504040633,1680215003,Howe Neighborhood,6.06b,5222,5,44.935818,-93.21752,833,3,0
1,5582,1660166545,1514335701,1680215014,Vircroft Ashnia,7.02,810,5,44.891655,-93.34291,899,3,0
3,11134,1529977499,1527023589,1680215020,Linden Hills,7.02,10036,5,44.92776,-93.32235,886,3,0
4,142718,1675359061,1642013869,1680214953,City of Minneapolis Community Air Monitoring P...,7.02,14557,5,44.995792,-93.295395,865,3,0
5,142720,1675359105,1642013875,1680214975,City of Minneapolis community air monitoring p...,7.02,69160,5,44.95617,-93.25471,856,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,108366,1628110951,1623277596,1680215030,Dakota Trail,7.02,46201,5,44.87984,-93.38967,889,3,0
84,113486,1628274672,1625862371,1680215020,Bancroft,7.02,67860,5,44.928123,-93.25389,821,3,0
85,116413,1667066761,1627341088,1680215040,Seward,7.02,14793,5,44.961216,-93.2424,839,3,0
86,126487,1656379563,1631137483,1680214415,Northwoods Berry Clan,7.02,38415,5,45.040394,-93.21327,939,3,0


In [185]:
#Converting UNIX date/time to pd
df_stations['last_modified'] = pd.to_datetime(df_stations['last_modified'], unit='s')
df_stations['date_created'] = pd.to_datetime(df_stations['date_created'], unit='s')
df_stations['last_seen'] = pd.to_datetime(df_stations['last_seen'], unit='s')

In [186]:
df_stations.head()

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,2017-09-09 21:42:29,2017-08-29 21:03:53,2023-03-30 22:23:23,Howe Neighborhood,6.06b,5222,5,44.935818,-93.21752,833,3,0
1,5582,2022-08-10 21:22:25,2017-12-27 00:48:21,2023-03-30 22:23:34,Vircroft Ashnia,7.02,810,5,44.891655,-93.34291,899,3,0
3,11134,2018-06-26 01:44:59,2018-05-22 21:13:09,2023-03-30 22:23:40,Linden Hills,7.02,10036,5,44.92776,-93.32235,886,3,0
4,142718,2023-02-02 17:31:01,2022-01-12 18:57:49,2023-03-30 22:22:33,City of Minneapolis Community Air Monitoring P...,7.02,14557,5,44.995792,-93.295395,865,3,0
5,142720,2023-02-02 17:31:45,2022-01-12 18:57:55,2023-03-30 22:22:55,City of Minneapolis community air monitoring p...,7.02,69160,5,44.95617,-93.25471,856,3,0


### Converting lat/long to WKT for SQL

In [155]:
# Create a list to store the WKT values
wkt_list = []

# Iterate over each row and add the WKT representation of a point geometry
for index, row in df_stations.iterrows():
    latitude, longitude = row['latitude'], row['longitude']
    point = arcpy.Point(longitude, latitude)
    point_geom = arcpy.PointGeometry(point, 4326)
    wkt = point_geom.WKT
    wkt_list.append(wkt)

# Add the WKT column to the DataFrame
df_stations['WKT'] = wkt_list

## Let the QAQC Begin!

In [175]:
#visual inspection that all the points fall within the boundary + 8KM buffer

### Altitude Check

In [152]:
df_stations['altitude'] = pd.to_numeric(df_stations['altitude'], errors='coerce')
def check_range(value):
    if value is None:
        return 'no value given'  # or any other value that indicates a missing value
    elif value >= 680 and value <= 1120:
        pass
    else:
        return 'out of range (680-1120ft)'
    
df_stations['altitude_error'] = df_stations['altitude'].apply(check_range)

print(df_stations)
#680 - 1120 Ref https://en-us.topographic-map.com/map-bbpz4/Minneapolis/

   sensor_index       last_modified        date_created           last_seen  \
0          3088 2017-09-09 21:42:29 2017-08-29 21:03:53 2023-03-30 22:23:23   
1          5582 2022-08-10 21:22:25 2017-12-27 00:48:21 2023-03-30 22:23:34   
2        137876 2021-11-16 18:14:29 2021-11-16 17:13:03 2023-03-30 22:23:41   
3         11134 2018-06-26 01:44:59 2018-05-22 21:13:09 2023-03-30 22:23:40   
4        142718 2023-02-02 17:31:01 2022-01-12 18:57:49 2023-03-30 22:22:33   
..          ...                 ...                 ...                 ...   
83       108366 2021-08-04 21:02:31 2021-06-09 22:26:36 2023-03-30 22:23:50   
84       113486 2021-08-06 18:31:12 2021-07-09 20:26:11 2023-03-30 22:23:40   
85       116413 2022-10-29 18:06:01 2021-07-26 23:11:28 2023-03-30 22:24:00   
86       126487 2022-06-28 01:26:03 2021-09-08 21:44:43 2023-03-30 22:13:35   
87       128195 2022-02-10 19:55:03 2021-09-16 22:40:18 2023-03-30 22:23:13   

                                                 na

## Connecting to the Server

In [63]:
import psycopg2
from psycopg2 import sql

In [64]:
# Get credentials

cred_pth = os.path.join(os.getcwd(), '..', '..', 'database', 'db_credentials.txt')

with open(cred_pth, 'r') as f:
    
    creds = f.readlines()[0].rstrip('\n').split(', ')
    
# Connect to PostGIS Database

pg_connection_dict = dict(zip(['dbname', 'user', 'password', 'port', 'host'], creds))

connection = psycopg2.connect(**pg_connection_dict)

0

## Insert Data into SQL Table

In [66]:
#connect to the cursor
cur = connection.cursor()

# iterate over the dataframe and insert each row into the database using a SQL INSERT statement
for index, row in df_stations.iterrows():
    cur.execute('''
    INSERT INTO PURPLEAIR_STATIONS (sensor_index, last_modified, date_created, last_seen, name, firmware_version, uptime, 
                        position_rating, latitude, longitude, altitude, channel_state, channel_flags, WKT)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ST_SetSRID(ST_GeomFromText(%s), 4326)::geometry)
    ''', (row['sensor_index'], row['last_modified'], row['date_created'], row['last_seen'], row['name'], row['firmware_version'], 
          row['uptime'], row['position_rating'], row['latitude'], row['longitude'], row['altitude'], 
          row['channel_state'], row['channel_flags'], row['WKT']))
    connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()