## Prep

In [25]:
# File Manipulation

import os # For working with Operating System
import sys # System arguments
from io import StringIO # String input/output
from dotenv import load_dotenv # Loading .env info

# Web

import requests # Accessing the Web

# Time

import datetime as dt # Working with dates/times
import pytz # Timezones
import time # For Sleeping

# Database 

import psycopg2
from psycopg2 import sql

# Data Manipulation

import numpy as np
import geopandas as gpd
import pandas as pd

### Load Functions

In [27]:
script_path = os.path.join('..', '..', 'Scripts', 'python')

# Function definition - Please see Scripts/python/*
exec(open(os.path.join(script_path, 'Get_spikes_df.py')).read())
exec(open(os.path.join(script_path, 'Create_messages.py')).read())
exec(open(os.path.join(script_path, 'twilio_functions.py')).read())
exec(open(os.path.join(script_path, 'Update_Alerts.py')).read())
exec(open(os.path.join(script_path, 'Send_Alerts.py')).read())
exec(open(os.path.join(script_path, 'Daily_Updates.py')).read())

### Global Variables

In [28]:
load_dotenv() # Load .env file

## API Keys

purpleAir_api = os.getenv('PURPLEAIR_API_TOKEN') # PurpleAir API Read Key

redCap_token_signUp = os.getenv('REDCAP_TOKEN_SIGNUP') # Survey Token
redCap_token_report = os.getenv('REDCAP_TOKEN_REPORT') # Report Token

## Database credentials

creds = [os.getenv('DB_NAME'),
         os.getenv('DB_USER'),
         os.getenv('DB_PASS'),
         os.getenv('DB_PORT'),
         os.getenv('DB_HOST')
        ]

pg_connection_dict = dict(zip(['dbname', 'user', 'password', 'port', 'host'], creds))  

## Twilio Information

TWILIO_ACCOUNT_SID = os.getenv('TWILIO_ACCOUNT_SID')
TWILIO_AUTH_TOKEN = os.getenv('TWILIO_AUTH_TOKEN')
TWILIO_NUMBER = os.getenv('TWILIO_NUMBER')

### When to Update?

In [29]:
# When to stop the program? (datetime)
days_to_run = 1 # How many days will we run this?
starttime = dt.datetime.now(pytz.timezone('America/Chicago')) 
stoptime = starttime + dt.timedelta(days=days_to_run)

In [30]:
next_update_time = starttime.replace(hour=8, minute = 0, second = 0)

print(next_update_time)

2023-11-16 08:00:00.158833-06:00


In [31]:
# When was last update?

# Connect
conn = psycopg2.connect(**pg_connection_dict) 
# Create cursor
cur = conn.cursor()

cmd = sql.SQL('''SELECT MAX(last_seen)
FROM "PurpleAir Stations";
''')

cur.execute(cmd) # Execute
conn.commit() # Committ command

# Unpack response into timezone aware datetime

time = cur.fetchall()[0][0].replace(tzinfo=pytz.timezone('America/Chicago'))

# Close cursor
cur.close()
# Close connection
conn.close()

In [32]:
time

datetime.datetime(2023, 11, 16, 13, 41, 46, tzinfo=<DstTzInfo 'America/Chicago' LMT-1 day, 18:09:00 STD>)

In [33]:
time < next_update_time

False

# PurpleAir Stations

In [9]:
def Sensor_Information_Daily_Update(pg_connection_dict, purpleAir_api):
    '''
    This is the full workflow for updating our sensor information in the database. 
    Please see Daily_Updates.py for specifics on the functions
    '''
    pass
    

In [41]:
# Load information from our database
sensors_df = Get_our_sensor_info(pg_connection_dict) # Get our sensor info

In [35]:
# Load information from PurpleAir
nwlng, selat, selng, nwlat = Get_extent(pg_connection_dict) # Get bounds of our project
purpleAir_df = Get_PurpleAir(nwlng, selat, selng, nwlat, purpleAir_api) # Get PurpleAir data

In [42]:
# Merge the datasets
merged = pd.merge(sensors_df,
                 purpleAir_df, 
                 on = 'sensor_index', 
                 how = 'outer',
                 suffixes = ('_SpikeAlerts',
                             '_PurpleAir') 
                             )

In [43]:
# Clean up datatypes
merged['sensor_index'] = merged.sensor_index.astype(int)
merged['channel_state'] = merged.channel_state.astype("Int64")

### Different Names

In [44]:
# Do the names match up?
names_match = (merged.name_SpikeAlerts == merged.name_PurpleAir)

# Different Names
diffName_df = merged[~names_match]

In [46]:
if len(diffName_df):

    ## New Name - in PurpleAir not ours - Add to our database (another PurpleAir api call)
    ### Conditions
    is_new_name = diffName_df.name_SpikeAlerts.isna() # Boolean Series
    # Sensor Indices as a list
    new_indices = diffName_df[is_new_name].sensor_index.to_list()
    if len(new_indices) > 0:
        Add_new_PurpleAir_Stations(new_indices, pg_connection_dict, purpleAir_api)
    
    ## No PurpleAir Name - Potentially old sensors - flag channel_state if last seen greater than 4 days
    ### Conditions
    no_name_PurpleAir = (diffName_df.name_PurpleAir.isna()) # Boolean Series
    not_seen_recently = (diffName_df.last_seen_SpikeAlerts.dt.date < (dt.datetime.now(pytz.timezone('America/Chicago')) - dt.timedelta(days = 4)).date()) # Seen in past 4 days?
    good_channel_state = (diffName_df.channel_state != 0) # Were we aware?
    # Sensor Indices as a list
    bad_indices = diffName_df[no_name_PurpleAir & not_seen_recently & good_channel_state
                              ].sensor_index.to_list()
    if len(bad_indices) > 0:
        Flag_channel_state(bad_indices, pg_connection_dict)
    
    ## Both have names but they're different - update with purpleair info
    ### Conditions
    name_controversy = (~no_name_PurpleAir & ~is_new_name) # Not new and not no name from PurpleAir
    # The dataframe under these conditions
    name_controversy_df = diffName_df[name_controversy].copy() # Has a different name!
    if len(name_controversy_df.sensor_index) > 0:
        Update_name(name_controversy_df, pg_connection_dict)

### Same Names

In [47]:
# If we've got a 4 in our channel_flags, 
# issue is from the previous day.

# We should probably notify the City! <- done in notebook 3_Daily_Updates/1_PurpleAir_Stations.ipynb

is_new_issue = (merged.channel_flags_SpikeAlerts == 4)

new_issue_df = merged[is_new_issue]

In [48]:
if len(new_issue_df) > 0:
    
    # Conditions

    conditions = ['wifi_down?', 'a_down', 'b_down', 'both_down'] # corresponds to 0, 1, 2, 3 from PurpleAir channel_flags

    # Initialize storage

    email = '''Hello City of Minneapolis Health Department,

    Writing today to inform you of some anomalies in the PurpleAir sensors that we discovered:

    name, last seen, channel issue

    '''

    for i, condition in enumerate(conditions):

        if i == 0: # Only "serious" wifi issue if longer than 6 hours

            con_df = new_issue_df[(new_issue_df.channel_flags_PurpleAir == i
                                    ) & (new_issue_df.last_seen_PurpleAir < dt.datetime.now(pytz.timezone('America/Chicago')) - dt.timedelta(hours = 6))]
        
        else:  
            con_df = new_issue_df[new_issue_df.channel_flags_PurpleAir == i]

        for i, row in con_df.iterrows():

                
            email += f'\n{row.name_PurpleAir}, {row.last_seen_PurpleAir.strftime("%m/%d/%y - %H:%M")}, {condition}'

    email += '\n\nTake Care,\nSpikeAlerts'
    print(email)
    
# Then update all the channel flags and last seens

Hello City of Minneapolis Health Department,

    Writing today to inform you of some anomalies in the PurpleAir sensors that we discovered:

    name, last seen, channel issue

    
City of Minneapolis Community Air Monitoring Project 16, 11/16/23 - 15:29, a_down
City of Minneapolis Community Air Monitoring Project 58, 11/16/23 - 15:29, b_down
City of Minneapolis Community Air Monitoring Project 6, 11/16/23 - 15:28, b_down
City of Minneapolis Community Air Monitoring Project 67, 11/16/23 - 15:29, b_down
City of Minneapolis Community Air Monitoring Project 63, 11/16/23 - 15:28, b_down

Take Care,
SpikeAlerts


In [49]:
# Then update all the channel flags and last seens

Update_Flags_LastSeen(merged[names_match].copy(), pg_connection_dict)

# REDCap

In [52]:
max_record_id = Get_newest_user(pg_connection_dict)

In [None]:
Add_new_users_from_REDCap(max_record_id, redCap_token_signUp, pg_connection_dict)