# PurpleAir Stations QAQC

## Set Working Environment

In [1]:
arcpy.env.workspace = r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC"

## Import Packages

In [2]:
import os # For working with Operating System
from sys import platform # Diagnose operating system
import requests 
import datetime as dt # Converting unix time
import json

In [3]:
import numpy as np # For working with Arrays
import pandas as pd # Data Manipulation
import arcpy

In [4]:
from pprint import pprint # Pretty Printing
import matplotlib.pyplot as plt # Basic Plotting

## Import Minneapolis Boundary

In [5]:
#Importing the GeoJSON of MPLS boundary
mpls_bndry_path = os.path.join(os.getcwd(), 'mpls_boundary.geojson')

In [6]:
#converting the GeoJSON to a Feature
arcpy.conversion.JSONToFeatures(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_boundary.geojson", r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_boundary_JSONToFeatures.shp", "POLYGON")

## Buffering Boundary by 8km

In [7]:
arcpy.analysis.Buffer("mpls_boundary_JSONToFeatures", r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km.shp", "8 Kilometers")

## Set Bounds for PurpleAir Parameters

In [8]:
#Changing the projection from UMT 15 to NAD 84
arcpy.management.Project(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km.shp", r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km_Project", 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]', "WGS_1984_(ITRF00)_To_NAD_1983", 'PROJCS["NAD_1983_UTM_Zone_15N",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-93.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]', "NO_PRESERVE_SHAPE", None, "NO_VERTICAL")

ExecuteError: ERROR 000601: Cannot delete C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km_Project.shp.  May be locked by another application.
Failed to execute (Project).


In [221]:
#Create bounding box
arcpy.management.MinimumBoundingGeometry(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km_Project.shp", r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_8km_Proje_MinimumBoundi.shp", "RECTANGLE_BY_AREA")

In [222]:
#Converting bounding box to feature class
arcpy.conversion.FeatureClassToFeatureClass("mpls_8km_Proje_MinimumBoundi", r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC", "mpls_bounds.shp", '', 'CTU_ID "CTU_ID" true true false 19 Double 0 0,First,#,mpls_8km_Proje_MinimumBoundi,CTU_ID,-1,-1;CTU_NAME "CTU_NAME" true true false 254 Text 0 0,First,#,mpls_8km_Proje_MinimumBoundi,CTU_NAME,0,254;CTU_CODE "CTU_CODE" true true false 254 Text 0 0,First,#,mpls_8km_Proje_MinimumBoundi,CTU_CODE,0,254;BUFF_DIST "BUFF_DIST" true true false 19 Double 0 0,First,#,mpls_8km_Proje_MinimumBoundi,BUFF_DIST,-1,-1;ORIG_FID "ORIG_FID" true true false 10 Long 0 10,First,#,mpls_8km_Proje_MinimumBoundi,ORIG_FID,-1,-1;ORIG_FID_1 "ORIG_FID_1" true true false 10 Long 0 10,First,#,mpls_8km_Proje_MinimumBoundi,ORIG_FID_1,-1,-1', '')

In [12]:
#Setting lat/long for PurpleAir API Parameters
nwlng = arcpy.Describe(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_bounds.shp").extent.XMin
nwlat = arcpy.Describe(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_bounds.shp").extent.YMax
selng = arcpy.Describe(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_bounds.shp").extent.XMax
selat = arcpy.Describe(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\mpls_bounds.shp").extent.YMin

## Importing PurpleAir STATION Data from PurpleAir API

In [9]:
def getSensorsData(query='', api_read_key=''):

    # my_url is assigned the URL we are going to send our request to.
    url = 'https://api.purpleair.com/v1/sensors?' + query
    
    print('Here is the full url for the API call:\n\n', url)

    # my_headers is assigned the context of our request we want to make. In this case
    # we will pass through our API read key using the variable created above.
    my_headers = {'X-API-Key':api_read_key}

    # This line creates and sends the request and then assigns its response to the
    # variable, r.
    response = requests.get(url, headers=my_headers)

    # We then return the response we received.
    return response

In [10]:
#PurpleAir API 'read' key
api = input('Please enter your Purple Air api key')

Please enter your Purple Air api key51592903-B445-11ED-B6F4-42010A800007


In [13]:
#Set bounding strings for API parameters
bounds_strings = [f'nwlng={nwlng}',
                  f'nwlat={nwlat}',
                  f'selng={selng}',
                  f'selat={selat}']

bounds_string = '&'.join(bounds_strings)

print(bounds_string)

nwlng=-93.43083716771733&nwlat=45.12366876357701&selng=-93.09225748816463&selat=44.81791263266678


In [14]:
#Setting parameters for API
fields = ['name', 'firmware_version','date_created','last_modified','last_seen','uptime','position_rating','channel_state','channel_flags','altitude',
          'location_type','latitude', 'longitude']


#ignore this - I am trying to pull both station params as well as real-time
#fields = ['time_stamp', 'humidity', 'temperature', 'pressure', 'pm2.5']

fields_string = 'fields=' + '%2C'.join(fields)

print(fields_string)

fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude


In [15]:
#finalizing query for API function
query_string = '&'.join([fields_string, bounds_string])

print(query_string)

fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude&nwlng=-93.43083716771733&nwlat=45.12366876357701&selng=-93.09225748816463&selat=44.81791263266678


In [16]:
#calling the API
response = getSensorsData(query_string, api)

Here is the full url for the API call:

 https://api.purpleair.com/v1/sensors?fields=name%2Cfirmware_version%2Cdate_created%2Clast_modified%2Clast_seen%2Cuptime%2Cposition_rating%2Cchannel_state%2Cchannel_flags%2Caltitude%2Clocation_type%2Clatitude%2Clongitude&nwlng=-93.43083716771733&nwlat=45.12366876357701&selng=-93.09225748816463&selat=44.81791263266678


In [17]:
response_dict = response.json() # Read response as a json (dictionary)

col_names = response_dict['fields']
data = np.array(response_dict['data'])

df_stations = pd.DataFrame(data, columns = col_names)

df_stations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   sensor_index      88 non-null     object
 1   last_modified     88 non-null     object
 2   date_created      88 non-null     object
 3   last_seen         88 non-null     object
 4   name              88 non-null     object
 5   location_type     88 non-null     object
 6   firmware_version  88 non-null     object
 7   uptime            88 non-null     object
 8   position_rating   88 non-null     object
 9   latitude          88 non-null     object
 10  longitude         88 non-null     object
 11  altitude          88 non-null     object
 12  channel_state     88 non-null     object
 13  channel_flags     88 non-null     object
dtypes: object(14)
memory usage: 9.8+ KB


## Cleaning PurpleAir Station Data

In [23]:
#visualizing API response
df_stations.head()

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,location_type,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,1504993349,1504040633,1679866346,Howe Neighborhood,0,6.06b,34880,5,44.935818,-93.21752,833,3,0
1,5582,1660166545,1514335701,1679866313,Vircroft Ashnia,0,7.02,636,5,44.891655,-93.34291,899,3,0
2,137876,1637086469,1637082783,1679866266,King Field Indoors,1,7.02,23292,0,44.928917,-93.284706,886,3,0
3,11134,1529977499,1527023589,1679866266,Linden Hills,0,7.02,4224,5,44.92776,-93.32235,886,3,0
4,142718,1675359061,1642013869,1679866315,City of Minneapolis Community Air Monitoring P...,0,7.02,8746,5,44.995792,-93.295395,865,3,0


In [28]:
#filtering for all 'outside' sensors and removing 'inside' sensors
outside_sensors = df_stations[df_stations.location_type=='0'] # 0 = outside

len(outside_sensors)

80

In [27]:
#Converting the UNIX dates to time stamps

#keeping commented for now because this is a new addition and I will need to update my SQL table to accommodate
#outside_sensors['last_seen'] = outside_sensors.last_seen.apply(lambda x: dt.datetime.fromtimestamp(int(x))).values
#outside_sensors['last_modified'] = outside_sensors.last_modified.apply(lambda x: dt.datetime.fromtimestamp(int(x))).values
#outside_sensors['date_created'] = outside_sensors.date_created.apply(lambda x: dt.datetime.fromtimestamp(int(x))).values

#print('Least recent sensor was seen: ', last_seen.min())
#print('Most recent sensor was seen: ', last_seen.max())

In [29]:
outside_sensors.head()

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,location_type,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,1504993349,1504040633,1679866346,Howe Neighborhood,0,6.06b,34880,5,44.935818,-93.21752,833,3,0
1,5582,1660166545,1514335701,1679866313,Vircroft Ashnia,0,7.02,636,5,44.891655,-93.34291,899,3,0
3,11134,1529977499,1527023589,1679866266,Linden Hills,0,7.02,4224,5,44.92776,-93.32235,886,3,0
4,142718,1675359061,1642013869,1679866315,City of Minneapolis Community Air Monitoring P...,0,7.02,8746,5,44.995792,-93.295395,865,3,0
5,142720,1675359105,1642013875,1679866332,City of Minneapolis community air monitoring p...,0,7.02,63349,5,44.95617,-93.25471,856,3,0


## Exporting cleaned data CSV to use with Arcpy

In [30]:
purpleair_stations_df = outside_sensors.to_csv(r'C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\purpleair_stations_df.csv', index = False)
purpleair_stations_df = pd.read_csv('purpleair_stations_df.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'purpleair_stations_df.csv'

## Let the QAQC Begin!

In [26]:
#visual inspection of points

arcpy.management.XYTableToPoint(r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\purpleair_stations_df.csv", r"C:\Users\tande\Documents\ArcGIS\Projects\PurpleAir_PM_QAQC\PurpleAir_PM_QAQC.gdb\purpleair_stations_XYTableToPoint", "longitude", "latitude", None, 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]];-400 -400 1000000000;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision')

In [27]:
#checking the geometry of lat/longs
result = arcpy.management.CheckGeometry("purpleair_stations_XYTableToPoint", r"C:\Users\tande\Documents\ArcGIS\Projects\PurpleAir_PM_QAQC\PurpleAir_PM_QAQC.gdb\purpleair_stat_CheckGeometry", "OGC")
if result[0] == 'true':
    print(result.getMessages())

else:
    # No problems found
    print("No problems found")

No problems found


In [162]:
#removing null values

bool_series = pd.notnull(purpleair_stations_df)
purpleair_stations_df = purpleair_stations_df[bool_series]
purpleair_stations_df.head()
#still 79 rows

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,location_type,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
0,3088,1504993349,1504040633,1679705643,Howe Neighborhood,0,6.06b,32201,5,44.935818,-93.21752,833,3,0
1,5582,1660166545,1514335701,1679705566,Vircroft Ashnia,0,7.02,344,5,44.891655,-93.34291,899,3,0
2,11134,1529977499,1527023589,1679705568,Linden Hills,0,7.02,1545,5,44.92776,-93.32235,886,3,0
3,142718,1675359061,1642013869,1679705617,City of Minneapolis Community Air Monitoring P...,0,7.02,6068,5,44.995792,-93.295395,865,3,0
4,142720,1675359105,1642013875,1679705632,City of Minneapolis community air monitoring p...,0,7.02,60671,5,44.95617,-93.25471,856,3,0


In [163]:
# Checking altitude

# Commented out for now becasue not working properly
#Altitude should be between 687 (Minnehaha Falls) to 830

#outside_sensors['altitude'].astype(int)
#altitude_check = outside_sensors[outside_sensors['altitude'] > 686 and < 831]

SyntaxError: invalid syntax (<string>, line 6)

In [192]:
#converting lat/long to WKT for SQL

import csv
from shapely.geometry import Point

csv_path = r"C:\Users\tande\Documents\GitHub\QualityAirQualityCities\PurpleAirQAQC\purpleair_stations_df.csv"

# Load the input DataFrame from a CSV file
df = pd.read_csv(csv_path)

# Create a list to store the WKT values
wkt_list = []

# Iterate over each row and add the WKT representation of a point geometry
for index, row in df.iterrows():
    latitude, longitude = row['latitude'], row['longitude']
    point = Point(longitude, latitude)
    wkt = point.wkt
    wkt_list.append(wkt)

# Add the WKT column to the DataFrame
df['WKT'] = wkt_list

# Write the DataFrame to a CSV file
df.to_csv('output.csv', index=False)

## Connecting to the Server

In [31]:
import psycopg2
from psycopg2 import sql

In [212]:
connection = psycopg2.connect(host = '34.132.44.118',
                              database = 'lab1-2',
                              user = 'postgres',
                              password = 'password')
connection.closed

0

## Create SQL Table

In [213]:
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS purpleair_stations;")
new_table = sql.SQL("CREATE TABLE purpleair_stations (sensor_index integer, last_modified integer, date_created integer, last_seen integer, name varchar(100), location_type integer, firmware_version varchar(30), uptime integer, position_rating integer, latitude float, longitude float, altitude integer, channel_state integer, channel_flags integer, WKT geometry);")

cursor.execute(new_table)
connection.commit()

## Insert Data into SQL Table

In [214]:
# Open the CSV file
csv_path = r"C:\Users\tande\Documents\ArcGIS\Projects\PurpleAir_PM_QAQC\output.csv"
with open(csv_path) as f:
    # Use the CSV reader to read the file
    reader = csv.reader(f)
    # Skip the header row
    next(reader)
    # Iterate over each row and insert into the table
    for row in reader:
        # Unpack the row fields into separate variables
        sensor_index, last_modified, date_created, last_seen, name, location_type, firmware_version, uptime, position_rating, latitude, longitude, altitude, channel_state, channel_flags, wkt = row
        # Construct the INSERT statement with explicit field names
        query = """INSERT INTO purpleair_stations (
                        sensor_index, last_modified, date_created, last_seen, name, location_type, firmware_version, uptime, 
                        position_rating, latitude, longitude, altitude, channel_state, channel_flags, WKT) 
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
        # Execute the INSERT statement with the row fields as parameters
        cursor.execute(query, (sensor_index, last_modified, date_created, last_seen, name, location_type, firmware_version, 
                               uptime, position_rating, latitude, longitude, altitude, channel_state, channel_flags, wkt))

# Commit the changes to the database
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()