# PurpleAir Stations QAQC

## Import Packages

In [1]:
### Import Packages

# File manipulation

import os # For working with Operating System
import requests # Accessing the Web
import datetime as dt # Working with dates/times

# Database 

import psycopg2
from psycopg2 import sql

# Analysis

import numpy as np
import geopandas as gpd
import pandas as pd

In [2]:
# Load our Functions

script_path = os.path.join('..', '..', 'Scripts', 'python')

# Function definition - Please see Scripts/python/*
exec(open(os.path.join(script_path, 'Get_spikes_df.py')).read())
exec(open(os.path.join(script_path, 'Daily_Updates.py')).read())
# exec(open(os.path.join(script_path, 'Update_Alerts.py')).read())

## Definitions

In [3]:
# Getting .env information (PurpleAir API Read Key)
from dotenv import load_dotenv

load_dotenv()

purpleAir_api = os.getenv('PURPLEAIR_API_TOKEN')

## Database credentials

creds = [os.getenv('DB_NAME'),
         os.getenv('DB_USER'),
         os.getenv('DB_PASS'),
         os.getenv('DB_PORT'),
         os.getenv('DB_HOST')
        ]

pg_connection_dict = dict(zip(['dbname', 'user', 'password', 'port', 'host'], creds)) 

## Load Previous Information

### Extent of study

Getting lat/long for PurpleAir API Parameters

In [4]:
# Connect

conn = psycopg2.connect(**pg_connection_dict) 
# Create cursor
cur = conn.cursor()

cmd = sql.SQL('''
WITH buffer as
	(
	SELECT ST_BUFFER(ST_Transform(ST_SetSRID(geometry, 4326),
								  26915),
					 100) geom -- buff the geometry by 100 meters
	FROM "Minneapolis Boundary"
	), bbox as
	(
	SELECT ST_EXTENT(ST_Transform(geom, 4326)) b
	FROM buffer
	)
SELECT b::text
FROM bbox;
''')

cur.execute(cmd) # Execute
conn.commit() # Committ command

In [5]:
# Gives a string

response = cur.fetchall()[0][0]

# Close cursor
cur.close()
# Close connection
conn.close()

In [6]:
response

'BOX(-93.33037537752216 44.88968834134478,-93.19306250738248 45.05214646628739)'

In [7]:
# Unpack the response

num_string = response[4:-1]

num_string 

'-93.33037537752216 44.88968834134478,-93.19306250738248 45.05214646628739'

In [8]:
# That's in xmin, ymin, xmax, ymax

xmin = num_string.split(' ')[0]
ymin = num_string.split(' ')[1].split(',')[0]
xmax = num_string.split(' ')[1].split(',')[1]
ymax = num_string.split(' ')[2]

# Convert into PurpleAir API notation

nwlng, selat, selng, nwlat = xmin, ymin, xmax, ymax

# Importing New PurpleAir Station Data

In [9]:
#Set bounding strings for API parameters
bounds_strings = [f'nwlng={nwlng}',
                  f'nwlat={nwlat}',
                  f'selng={selng}',
                  f'selat={selat}']

bounds_string = '&'.join(bounds_strings)

print(bounds_string)

nwlng=-93.33037537752216&nwlat=45.05214646628739&selng=-93.19306250738248&selat=44.88968834134478


In [10]:
#Setting parameters for API

# Fields
fields = ['firmware_version','date_created','last_modified','last_seen',
          'name', 'uptime','position_rating','channel_state','channel_flags',
          'altitude', 'latitude', 'longitude', 'location_type']

fields_string = 'fields=' + '%2C'.join(fields)

# Finalizing query for API function
query_string = '&'.join([fields_string, bounds_string])

In [11]:
#calling the API
response = getSensorsData(query_string, purpleAir_api) # See Get_spikes_df.py

In [20]:
# Unpack response

response_dict = response.json() # Read response as a json (dictionary)

col_names = response_dict['fields']
data = np.array(response_dict['data'])

df = pd.DataFrame(data, columns = col_names) # Convert to dataframe

# Correct Last Seen/modified/date created into datetimes

df['last_modified'] = pd.to_datetime(df['last_modified'].astype(int),
                                             utc = True,
                                             unit='s').dt.tz_convert('America/Chicago')
df['date_created'] = pd.to_datetime(df['date_created'].astype(int),
                                         utc = True,
                                         unit='s').dt.tz_convert('America/Chicago')
df['last_seen'] = pd.to_datetime(df['last_seen'].astype(int),
                                         utc = True,
                                         unit='s').dt.tz_convert('America/Chicago')

 # Make sure sensor_index/location_type is an integer

df['sensor_index'] = pd.to_numeric(df['sensor_index'])
df['location_type'] = pd.to_numeric(df['location_type'])


# Filter for City of Minneapolis & outside sensors

is_city = df.name.apply(lambda x: 'CITY OF MINNEAPOLIS' in x.upper())
is_outside = df.location_type == 0

purpleAir_df = df[is_city & is_outside].copy()

In [28]:
purpleAir_df.head()

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,name,location_type,firmware_version,uptime,position_rating,latitude,longitude,altitude,channel_state,channel_flags
13,142718,2023-08-02 13:50:17-05:00,2022-01-12 12:57:49-06:00,2023-11-15 11:44:05-06:00,City of Minneapolis Community Air Monitoring P...,0,7.02,222,5,44.99631,-93.29565,857,3,0
14,142720,2023-08-02 13:06:36-05:00,2022-01-12 12:57:55-06:00,2023-11-15 11:44:38-06:00,City of Minneapolis Community Air Monitoring P...,0,7.02,47321,5,44.955555,-93.254974,850,3,0
15,142726,2023-02-02 11:31:06-06:00,2022-01-12 12:58:17-06:00,2023-11-15 11:45:03-06:00,City of Minneapolis Community Air Monitoring P...,0,7.02,2157,5,45.01507,-93.28903,889,3,0
16,142724,2023-08-02 11:12:05-05:00,2022-01-12 12:58:09-06:00,2023-11-15 11:43:58-06:00,City of Minneapolis Community Air Monitoring P...,0,7.02,21784,5,44.937733,-93.24356,859,3,0
17,142734,2023-08-02 11:05:23-05:00,2022-01-12 12:58:49-06:00,2023-11-15 11:44:33-06:00,City of Minneapolis Community Air Monitoring P...,0,7.02,9702,5,44.90432,-93.280624,847,3,0


In [23]:
# Spatializing
                                         
gdf = gpd.GeoDataFrame(purpleAir_df, 
                            geometry = gpd.points_from_xy(
                                purpleAir_df.longitude,
                                purpleAir_df.latitude,
                                crs = 'EPSG:4326')
                           )

## Insert Data into SQL Table

In [26]:
# Insert into database

cols_for_db = ['sensor_index', 'firmware_version', 'date_created', 'last_modified', 'last_seen',
 'name', 'uptime', 'position_rating', 'channel_state', 'channel_flags', 'altitude', 'geometry'] 

# Get values ready for database

sorted_df = gdf.copy()[cols_for_db[:-1]]  # Drop unneccessary columns & sort columns by cols_for db (without geometry - see next line)

# Get Well Known Text of the geometry
                     
sorted_df['wkt'] = gdf.geometry.apply(lambda x: x.wkt)

# Format the times

sorted_df['date_created'] = gdf.date_created.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))
sorted_df['last_modified'] = gdf.last_modified.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))
sorted_df['last_seen'] = gdf.last_seen.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))

In [27]:
sorted_df.head()

Unnamed: 0,sensor_index,firmware_version,date_created,last_modified,last_seen,name,uptime,position_rating,channel_state,channel_flags,altitude,wkt
13,142718,7.02,2022-01-12 12:57:49,2023-08-02 13:50:17,2023-11-15 11:44:05,City of Minneapolis Community Air Monitoring P...,222,5,3,0,857,POINT (-93.29565 44.99631)
14,142720,7.02,2022-01-12 12:57:55,2023-08-02 13:06:36,2023-11-15 11:44:38,City of Minneapolis Community Air Monitoring P...,47321,5,3,0,850,POINT (-93.254974 44.955555)
15,142726,7.02,2022-01-12 12:58:17,2023-02-02 11:31:06,2023-11-15 11:45:03,City of Minneapolis Community Air Monitoring P...,2157,5,3,0,889,POINT (-93.28903 45.01507)
16,142724,7.02,2022-01-12 12:58:09,2023-08-02 11:12:05,2023-11-15 11:43:58,City of Minneapolis Community Air Monitoring P...,21784,5,3,0,859,POINT (-93.24356 44.937733)
17,142734,7.02,2022-01-12 12:58:49,2023-08-02 11:05:23,2023-11-15 11:44:33,City of Minneapolis Community Air Monitoring P...,9702,5,3,0,847,POINT (-93.280624 44.90432)


In [212]:
# Connect to PostGIS Database

conn = psycopg2.connect(**pg_connection_dict)
cur = conn.cursor()

# iterate over the dataframe and insert each row into the database using a SQL INSERT statement

for index, row in sorted_df.copy().iterrows():

    q1 = sql.SQL('INSERT INTO "PurpleAir Stations" ({}) VALUES ({},{});').format(
     sql.SQL(', ').join(map(sql.Identifier, cols_for_db)),
     sql.SQL(', ').join(sql.Placeholder() * (len(cols_for_db)-1)),
     sql.SQL('ST_SetSRID(ST_GeomFromText(%s), 4326)::geometry'))
    # print(q1.as_string(conn))
    # print(row)
    # break
    
    cur.execute(q1.as_string(conn),
        (list(row.values))
        )
# Commit commands

conn.commit()

# Close the cursor and connection
cur.close()
conn.close()