# PurpleAir Stations - Daily Update

# Prep

## Import Packages

In [75]:
### Import Packages

# File manipulation

import os # For working with Operating System
import requests # Accessing the Web

# Time

import datetime as dt # Working with dates/times
import pytz

# Database 

import psycopg2
from psycopg2 import sql

# Analysis

import numpy as np
import geopandas as gpd
import pandas as pd

In [76]:
# Load our Functions

import PurpleAir_Functions as purp
# exec(open(os.path.join(script_path, 'Update_Alerts.py')).read())

## Definitions

In [77]:
# Getting .env information (PurpleAir API Read Key)
from dotenv import load_dotenv

load_dotenv()

purpleAir_api = os.getenv('PURPLEAIR_API_TOKEN')

## Database credentials

creds = [os.getenv('DB_NAME'),
         os.getenv('DB_USER'),
         os.getenv('DB_PASS'),
         os.getenv('DB_PORT'),
         os.getenv('DB_HOST')
        ]

pg_connection_dict = dict(zip(['dbname', 'user', 'password', 'port', 'host'], creds)) 

## Load Previous Information

### Sensor Information

Want sensor_index, channel_flags, last_seen

In [78]:
# Connect

conn = psycopg2.connect(**pg_connection_dict) 
# Create cursor
cur = conn.cursor()

cmd = sql.SQL('''SELECT sensor_index, last_seen, name, channel_flags, channel_state
FROM "PurpleAir Stations";
''')

cur.execute(cmd) # Execute
conn.commit() # Committ command

# Unpack response into pandas series

sensors_df = pd.DataFrame(cur.fetchall(), columns = ['sensor_index', 'last_seen', 'name', 'channel_flags', 'channel_state'])

# Datatype corrections

sensors_df['last_seen'] = pd.to_datetime(sensors_df['last_seen'])

# Get City ID

# sensors_df['city_id'] = sensors_df['name'].apply(lambda x: x.split(' ')[-1])

sensors_df['channel_flags'] = sensors_df.channel_flags.astype(int)

# Close cursor
cur.close()
# Close connection
conn.close()

### Extent of study

Getting lat/long for PurpleAir API Parameters

In [79]:
# Connect

conn = psycopg2.connect(**pg_connection_dict) 
# Create cursor
cur = conn.cursor()

cmd = sql.SQL('''
WITH buffer as
	(
	SELECT ST_BUFFER(ST_Transform(ST_SetSRID(geometry, 4326),
								  26915),
					 100) geom -- buff the geometry by 100 meters
	FROM "Minneapolis Boundary"
	), bbox as
	(
	SELECT ST_EXTENT(ST_Transform(geom, 4326)) b
	FROM buffer
	)
SELECT b::text
FROM bbox;
''')

cur.execute(cmd) # Execute
conn.commit() # Committ command

In [80]:
# Gives a string

response = cur.fetchall()[0][0]

# Close cursor
cur.close()
# Close connection
conn.close()

In [81]:
response

'BOX(-93.33037537752216 44.88968834134478,-93.19306250738248 45.05214646628739)'

In [82]:
# Unpack the response

num_string = response[4:-1]

num_string 

'-93.33037537752216 44.88968834134478,-93.19306250738248 45.05214646628739'

In [83]:
# That's in xmin, ymin, xmax, ymax

xmin = num_string.split(' ')[0]
ymin = num_string.split(' ')[1].split(',')[0]
xmax = num_string.split(' ')[1].split(',')[1]
ymax = num_string.split(' ')[2]

# Convert into PurpleAir API notation

nwlng, selat, selng, nwlat = xmin, ymin, xmax, ymax

## Importing New PurpleAir Station Data

In [84]:
#Set bounding strings for API parameters
bounds_strings = [f'nwlng={nwlng}',
                  f'nwlat={nwlat}',
                  f'selng={selng}',
                  f'selat={selat}']

bounds_string = '&'.join(bounds_strings)

print(bounds_string)

nwlng=-93.33037537752216&nwlat=45.05214646628739&selng=-93.19306250738248&selat=44.88968834134478


In [85]:
#Setting parameters for API (for comparing to our data)

# Fields
fields = ['sensor_index', 'channel_flags', 'last_seen', 'name']

fields_string = 'fields=' + '%2C'.join(fields)

# Finalizing query for API function
query_string = '&'.join([fields_string, bounds_string])

In [86]:
#calling the API
response = purp.getSensorsData(query_string, purpleAir_api)

In [87]:
# Unpack response

response_dict = response.json() # Read response as a json (dictionary)

col_names = response_dict['fields']
data = np.array(response_dict['data'])

df = pd.DataFrame(data, columns = col_names) # Convert to dataframe

# Correct Last Seen

df['last_seen'] = pd.to_datetime(pd.to_numeric(df['last_seen']),
                                         utc = True,
                                         unit='s').dt.tz_convert('America/Chicago')

 # Make sure sensor_index is an integer

df['sensor_index'] = pd.to_numeric(df['sensor_index'])


# Filter for City of Minneapolis

is_city = df.name.apply(lambda x: 'CITY OF MINNEAPOLIS' in x.upper())

purpleAir_df =  df[is_city].copy()

# # Get City ID

# purpleAir_df['city_id'] = purpleAir_df['name'].apply(lambda x: x.split(' ')[-1])

# Compare

In [88]:
# See PurpleAir results

print(len(purpleAir_df))
purpleAir_df.head()

46


Unnamed: 0,sensor_index,last_seen,name,channel_flags
13,142718,2023-11-20 16:21:36-06:00,City of Minneapolis Community Air Monitoring Project 50,0
14,142720,2023-11-20 16:21:32-06:00,City of Minneapolis Community Air Monitoring Project 29,0
15,142726,2023-11-20 16:03:52-06:00,City of Minneapolis Community Air Monitoring Project 11,0
16,142724,2023-11-20 16:20:24-06:00,City of Minneapolis Community Air Monitoring Project 21,0
17,142734,2023-11-20 16:21:19-06:00,City of Minneapolis Community Air Monitoring Project 19,0


In [89]:
# Check our database

print(len(sensors_df)) # From our database

sensors_df.head()

69


Unnamed: 0,sensor_index,last_seen,name,channel_flags,channel_state
0,142756,2023-10-19 11:03:33,City of Minneapolis Community Air Monitoring Project 51,4,3
1,142730,2023-09-25 09:18:30,City of Minneapolis Community Air Monitoring Project 13,4,3
2,142752,2023-06-22 10:19:40,City of Minneapolis Community Air Monitoring Project 40,4,3
3,142772,2023-03-28 14:34:07,City of Minneapolis community Air Monitoring Project 31,4,3
4,142926,2022-11-10 11:26:39,City of Minneapolis Community Air Monitoring Project 22,4,3


In [90]:
# Merge them on sensor_index

merged = pd.merge(sensors_df, purpleAir_df, on = 'sensor_index', how = 'outer', suffixes = ('_SpikeAlerts', '_PurpleAir') )

# Clean up datatypes

# Clean up datatypes
merged['sensor_index'] = merged.sensor_index.astype(int)
merged['channel_state'] = merged.channel_state.astype("Int64")
merged['channel_flags_PurpleAir'] = merged.channel_flags_PurpleAir.astype("Int64")
merged['channel_flags_SpikeAlerts'] = merged.channel_flags_SpikeAlerts.astype("Int64")

In [91]:
# Let's explore

pd.set_option('display.max_colwidth', None)

merged

Unnamed: 0,sensor_index,last_seen_SpikeAlerts,name_SpikeAlerts,channel_flags_SpikeAlerts,channel_state,last_seen_PurpleAir,name_PurpleAir,channel_flags_PurpleAir
0,142756,2023-10-19 11:03:33,City of Minneapolis Community Air Monitoring Project 51,4,3,NaT,,
1,142730,2023-09-25 09:18:30,City of Minneapolis Community Air Monitoring Project 13,4,3,NaT,,
2,142752,2023-06-22 10:19:40,City of Minneapolis Community Air Monitoring Project 40,4,3,NaT,,
3,142772,2023-03-28 14:34:07,City of Minneapolis community Air Monitoring Project 31,4,3,NaT,,
4,142926,2022-11-10 11:26:39,City of Minneapolis Community Air Monitoring Project 22,4,3,NaT,,
...,...,...,...,...,...,...,...,...
64,157747,2023-10-14 13:20:05,City of Minneapolis Community Air Monitoring Project 66,4,3,NaT,,
65,157757,2023-08-23 19:26:40,City of Minneapolis Community Air Monitoring Project 3,4,3,NaT,,
66,157785,2023-08-11 17:22:27,City of Minneapolis Community Air Monitoring Project 59,4,3,NaT,,
67,157837,2023-08-15 14:38:40,City of Minneapolis community Air Monitoring Project 22,4,3,NaT,,


In [92]:
# Do the names match up?

names_match = (merged.name_SpikeAlerts == merged.name_PurpleAir)

## Different Names

In [93]:
# What if they don't?

diffName_df = merged[~names_match]

diffName_df.sort_values(by = 'last_seen_SpikeAlerts')

Unnamed: 0,sensor_index,last_seen_SpikeAlerts,name_SpikeAlerts,channel_flags_SpikeAlerts,channel_state,last_seen_PurpleAir,name_PurpleAir,channel_flags_PurpleAir
4,142926,2022-11-10 11:26:39,City of Minneapolis Community Air Monitoring Project 22,4,3,NaT,,
23,145504,2023-01-09 17:09:39,City of Minneapolis Community Air Monitoring Project 4,4,3,NaT,,
3,142772,2023-03-28 14:34:07,City of Minneapolis community Air Monitoring Project 31,4,3,NaT,,
60,177765,2023-03-28 14:37:14,City of Minneapolis Community Air Monitoring Project 28,4,3,NaT,,
6,143238,2023-04-26 15:33:55,City of Minneapolis Community Air Monitoring Project 35,4,3,NaT,,
2,142752,2023-06-22 10:19:40,City of Minneapolis Community Air Monitoring Project 40,4,3,NaT,,
5,143222,2023-06-24 23:07:54,City of Minneapolis,4,3,NaT,,
62,143944,2023-06-27 07:01:58,City of Minneapolis Community Air Monitoring Project 37,4,3,NaT,,
63,145202,2023-06-29 14:20:54,City of Minneapolis Community Air Monitoring Project 60,4,3,NaT,,
24,145610,2023-07-03 06:22:30,City of Minneapolis Community Air Monitoring Project 24,4,3,NaT,,


### New Names

In [94]:
# What does PurpleAir have that we don't? <- Add them to our database! See Add_new_PurpleAir_Stations in Daily_Updates.py

is_new_name = diffName_df.name_SpikeAlerts.isna()

newName_df = diffName_df[is_new_name]

newName_df

Unnamed: 0,sensor_index,last_seen_SpikeAlerts,name_SpikeAlerts,channel_flags_SpikeAlerts,channel_state,last_seen_PurpleAir,name_PurpleAir,channel_flags_PurpleAir


In [95]:
# Get the new sensor_indices

sensor_indices = newName_df.sensor_index.to_list()

In [24]:
# Get the full information from PurpleAir on these new sensors

if len(sensor_indices) > 0:

    fields = ['firmware_version','date_created','last_modified','last_seen', 'name', 'uptime','position_rating','channel_state','channel_flags','altitude',
                  'latitude', 'longitude']
                  
    fields_string = 'fields=' + '%2C'.join(fields)
    
    sensor_string = 'show_only=' + '%2C'.join([str(sensor_index) for sensor_index in sensor_indices])
    
    query_string = '&'.join([fields_string, sensor_string])
    
    response = getSensorsData(query_string, purpleAir_api)

    # Unpack response
    
    response_dict = response.json() # Read response as a json (dictionary)
    
    col_names = response_dict['fields']
    data = np.array(response_dict['data'])

    df = pd.DataFrame(data, columns = col_names)

    # Correct Last Seen/modified/date created into datetimes (in UTC UNIX time)

    df['last_modified'] = pd.to_datetime(df['last_modified'].astype(int),
                                                 utc = True,
                                                 unit='s').dt.tz_convert('America/Chicago')
    df['date_created'] = pd.to_datetime(df['date_created'].astype(int),
                                             utc = True,
                                             unit='s').dt.tz_convert('America/Chicago')
    df['last_seen'] = pd.to_datetime(df['last_seen'].astype(int),
                                             utc = True,
                                             unit='s').dt.tz_convert('America/Chicago')
    
     # Make sure sensor_index/location_type is an integer
    
    df['sensor_index'] = pd.to_numeric(df['sensor_index'])

    # Spatializing
                                         
    gdf = gpd.GeoDataFrame(df, 
                                geometry = gpd.points_from_xy(
                                    df.longitude,
                                    df.latitude,
                                    crs = 'EPSG:4326')
                               )

In [None]:
# Insert into database

if len(gdf.sensor_index) > 0:

    cols_for_db = ['sensor_index', 'firmware_version', 'date_created', 'last_modified', 'last_seen',
     'name', 'uptime', 'position_rating', 'channel_state', 'channel_flags', 'altitude', 'geometry'] 
    
    # Get values ready for database

    sorted_df = gdf.copy()[cols_for_db[:-1]]  # Drop unneccessary columns & sort columns by cols_for db (without geometry - see next line)
    
    # Get Well Known Text of the geometry
                         
    sorted_df['wkt'] = gdf.geometry.apply(lambda x: x.wkt)
    
    # Format the times
    
    sorted_df['date_created'] = gdf.date_created.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))
    sorted_df['last_modified'] = gdf.last_modified.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))
    sorted_df['last_seen'] = gdf.last_seen.apply(lambda x : x.strftime('%Y-%m-%d %H:%M:%S'))

    # Connect to PostGIS Database

    conn = psycopg2.connect(**pg_connection_dict)
    cur = conn.cursor()
    
    # iterate over the dataframe and insert each row into the database using a SQL INSERT statement
    
    for index, row in sorted_df.copy().iterrows():
    
        q1 = sql.SQL('INSERT INTO "PurpleAir Stations" ({}) VALUES ({},{});').format(
         sql.SQL(', ').join(map(sql.Identifier, cols_for_db)),
         sql.SQL(', ').join(sql.Placeholder() * (len(cols_for_db)-1)),
         sql.SQL('ST_SetSRID(ST_GeomFromText(%s), 4326)::geometry'))
        # print(q1.as_string(conn))
        # print(row)
        # break
        
        cur.execute(q1.as_string(conn),
            (list(row.values))
            )
    # Commit commands
    
    conn.commit()
    
    # Close the cursor and connection
    cur.close()
    conn.close()

### Old Names (Not in PurpleAir)

In [26]:
# What do we have PurpleAir Doesn't? 
# ^ Mostly old sensors. These should be changed to channel_state = 0 in our database... 


no_name_PurpleAir = diffName_df.name_PurpleAir.isna()

no_PurpleAir_df = diffName_df[no_name_PurpleAir]

no_PurpleAir_df.sort_values(by = 'last_seen_SpikeAlerts')

Unnamed: 0,sensor_index,last_seen_SpikeAlerts,name_SpikeAlerts,channel_flags_SpikeAlerts,channel_state,last_seen_PurpleAir,name_PurpleAir,channel_flags_PurpleAir
4,142926,2022-11-10 05:26:39-06:00,City of Minneapolis Community Air Monitoring Project 22,0,0,NaT,,
26,145504,2023-01-09 11:09:39-06:00,City of Minneapolis Community Air Monitoring Project 4,0,0,NaT,,
3,142772,2023-03-28 09:34:07-05:00,City of Minneapolis community Air Monitoring Project 31,0,0,NaT,,
57,177765,2023-03-28 09:37:14-05:00,City of Minneapolis Community Air Monitoring Project 28,0,0,NaT,,
6,143238,2023-04-26 10:33:55-05:00,City of Minneapolis Community Air Monitoring Project 35,0,0,NaT,,
2,142752,2023-06-22 05:19:40-05:00,City of Minneapolis Community Air Monitoring Project 40,0,0,NaT,,
5,143222,2023-06-24 18:07:54-05:00,City of Minneapolis,0,0,NaT,,
59,143944,2023-06-27 02:01:58-05:00,City of Minneapolis Community Air Monitoring Project 37,0,0,NaT,,
60,145202,2023-06-29 09:20:54-05:00,City of Minneapolis Community Air Monitoring Project 60,0,0,NaT,,
27,145610,2023-07-03 01:22:30-05:00,City of Minneapolis Community Air Monitoring Project 24,0,0,NaT,,


In [27]:
# Get the sensors that haven't been seen in 4 days & have a good channel state (not zero)

not_seen_recently = (no_PurpleAir_df.last_seen_SpikeAlerts < dt.datetime.now(pytz.timezone('America/Chicago')) - dt.timedelta(days = 4))
good_channel_state = (no_PurpleAir_df.channel_state != 0)

bad_channel_state_df = no_PurpleAir_df[not_seen_recently & good_channel_state]

bad_indices = bad_channel_state_df.sensor_index.to_list()

if len(bad_indices) > 0:

    Flag_station_channel_state(bad_indices, pg_connection_dict) # See Daily_Updates.py

    # Probably should email city about these... NOT DONE

### Different Names

In [49]:
# In the off chance that we both have names, but they're different

# ^ We should update our database with the new name/last_seen/channel_flags, no question

name_controversy = ~no_name_PurpleAir & ~is_new_name # Not new and not no name from PurpleAir

name_controversy_df = diffName_df[name_controversy] # Has a different name, tho!

if len(name_controversy_df.sensor_index) > 0:

    # Connect to database
    conn = psycopg2.connect(**pg_connection_dict) 
    # Create cursor
    cur = conn.cursor()
    
    for i, row in name_controversy_df.iterrows():
    
        cmd = sql.SQL('''UPDATE "PurpleAir Stations"
        SET name = {}, last_seen = {}, channel_flags = {}
        WHERE sensor_index = {};
        ''').format(sql.Literal(row.name_PurpleAir),
                    sql.Literal(row.last_seen_PurpleAir.strftime('%Y-%m-%d %H:%M:%S')),
                    sql.Literal(row.channel_flags_PurpleAir),
                    sql.Literal(row.sensor_index))
        
        cur.execute(cmd)
        
    conn.commit()
    
    # Close cursor
    cur.close()
    # Close connection
    conn.close()

## Same Name

In [None]:
sameName_df = merged[names_match].copy()

sameName_df['channel_flags_SpikeAlerts'] = sameName_df.channel_flags_SpikeAlerts.astype(int)
sameName_df['channel_flags_PurpleAir'] = sameName_df.channel_flags_PurpleAir.astype(int)

### New Flags

In [50]:
# Major Sensor Flags

# Key
# Channel Flags - 0 = Normal, 1 = A Downgraded, 2 - B Downgraded, 3 - Both Downgraded, 4 = Change from yesterday

flagged = (sameName_df.channel_flags_SpikeAlerts != 0)

flagged_df = sameName_df[flagged]

flagged_df

Unnamed: 0,sensor_index,last_seen_SpikeAlerts,name_SpikeAlerts,channel_flags_SpikeAlerts,channel_state,last_seen_PurpleAir,name_PurpleAir,channel_flags_PurpleAir
0,142732,2023-11-02 14:08:48-05:00,City of Minneapolis Community Air Monitoring Project 16,4,3,2023-11-15 12:33:32-06:00,City of Minneapolis Community Air Monitoring Project 16,1
18,157871,2023-11-02 14:07:43-05:00,City of Minneapolis Community Air Monitoring Project 58,4,3,2023-11-15 12:33:30-06:00,City of Minneapolis Community Air Monitoring Project 58,2
19,145242,2023-11-02 14:07:56-05:00,City of Minneapolis Community Air Monitoring Project 6,4,3,2023-11-15 12:33:53-06:00,City of Minneapolis Community Air Monitoring Project 6,2
20,166459,2023-11-02 14:07:42-05:00,City of Minneapolis Community Air Monitoring Project 67,4,3,2023-11-15 12:33:29-06:00,City of Minneapolis Community Air Monitoring Project 67,2
21,157845,2023-11-02 14:08:06-05:00,City of Minneapolis Community Air Monitoring Project 63,4,3,2023-11-15 12:34:11-06:00,City of Minneapolis Community Air Monitoring Project 63,2
22,142736,2023-11-02 00:57:30-05:00,City of Minneapolis Community Air Monitoring Project 17,4,3,2023-11-14 16:14:33-06:00,City of Minneapolis Community Air Monitoring Project 17,0


In [45]:
# If we've got a 4, that mean's that the issue is from the previous day.

# We should probably notify the City!

is_new_issue = (flagged_df.channel_flags_SpikeAlerts == 4)

new_issue_df = flagged_df[is_new_issue]

# Conditions

conditions = ['wifi_down?', 'a_down', 'b_down', 'both_down'] # corresponds to 0, 1, 2, 3 from PurpleAir channel_flags

# Initialize storage

email = '''Hello City of Minneapolis Health Department,

Writing today to inform you of some anomalies in the PurpleAir sensors that we discovered:

name, last seen, channel issue

'''

for i, condition in enumerate(conditions):

    con_df = new_issue_df[flagged_df.channel_flags_PurpleAir == i]

    if i == 0: # Only "serious" wifi issue if longer than 6 hours

        con_df = con_df[(con_df.last_seen_PurpleAir < dt.datetime.now(pytz.timezone('America/Chicago')) - dt.timedelta(hours = 6))]

    for i, row in con_df.iterrows():

            
        email += f'\n{row.name_PurpleAir}, {row.last_seen_PurpleAir.strftime("%m/%d/%y - %H:%M")}, {condition}'

email += '\n\nTake Care,\nSpikeAlerts'
print(email)

Hello City of Minneapolis Health Department,

Writing today to inform you of some anomalies in the PurpleAir sensors that we discovered:

name, last seen, channel issue


City of Minneapolis Community Air Monitoring Project 17, 11/14/23 - 16:14, wifi_down?
City of Minneapolis Community Air Monitoring Project 16, 11/15/23 - 12:33, a_down
City of Minneapolis Community Air Monitoring Project 58, 11/15/23 - 12:33, b_down
City of Minneapolis Community Air Monitoring Project 6, 11/15/23 - 12:33, b_down
City of Minneapolis Community Air Monitoring Project 67, 11/15/23 - 12:33, b_down
City of Minneapolis Community Air Monitoring Project 63, 11/15/23 - 12:34, b_down

Take Care,
SpikeAlerts


### Update flags/last_seen
In Our Database to what we just got from PurpleAir

In [47]:
# ^ We should update our database with the new name/last_seen/channel_flags, no question

if len(sameName_df.sensor_index) > 0: 

    # Connect to database
    conn = psycopg2.connect(**pg_connection_dict) 
    # Create cursor
    cur = conn.cursor()
    
    for i, row in sameName_df.iterrows():
    
        cmd = sql.SQL('''UPDATE "PurpleAir Stations"
        SET last_seen = {}, channel_flags = {}
        WHERE sensor_index = {};
        ''').format(sql.Literal(row.last_seen_PurpleAir.strftime('%Y-%m-%d %H:%M:%S')),
                    sql.Literal(row.channel_flags_PurpleAir),
                    sql.Literal(row.sensor_index))
        
        cur.execute(cmd)
        
    conn.commit()
    
    # Close cursor
    cur.close()
    # Close connection
    conn.close()

# Condensed Sorting

In [96]:
merged_df = merged

In [106]:
# Do the names match up?
names_match = (merged_df.name_SpikeAlerts == merged_df.name_PurpleAir)
# Do we not have the name?
no_name_SpikeAlerts = (merged_df.name_SpikeAlerts.isna())
# Does PurpleAir not have the name?
no_name_PurpleAir = (merged_df.name_PurpleAir.isna())
# We haven't seen recently? - within 30 days
not_seen_recently = (merged_df.last_seen_SpikeAlerts.dt.date <
                        (dt.datetime.now(pytz.timezone('America/Chicago')
                        ) - dt.timedelta(days = 30)).date())
# PurpleAir hasn't seen recently - within 6 hours
not_seen_recently_PurpleAir = (merged_df.last_seen_PurpleAir < dt.datetime.now(pytz.timezone('America/Chicago')) - dt.timedelta(hours = 6))
# Good channel State
good_channel_state = (merged_df.channel_state != 0)
# New Flags
is_new_issue = (merged_df.channel_flags_SpikeAlerts == 4)

# Use the conditions to sort

same_name_indices = merged_df[names_match].sensor_index.to_list()
new_indices = merged_df[(~names_match) 
                        & (no_name_SpikeAlerts)].sensor_index.to_list()
expired_indices = merged_df[(~names_match) 
                            & (no_name_PurpleAir) 
                            & (not_seen_recently)
                            & (good_channel_state)].sensor_index.to_list()
confilcting_name_indices = merged_df[(~names_match) 
                                    & (~no_name_PurpleAir) 
                                    & (~no_name_SpikeAlerts)].sensor_index.to_list()
new_flag_indices = merged_df[(is_new_issue)].sensor_index.to_list()

In [104]:
# same_name_indices

In [105]:
expired_indices

[142756,
 142730,
 142752,
 142772,
 142926,
 143222,
 143238,
 142728,
 145504,
 145610,
 177765,
 143944,
 145202,
 157747,
 157757,
 157785,
 157837,
 194635]

In [100]:
confilcting_name_indices

[]

In [101]:
new_indices

[]

In [107]:
new_flag_indices

[142756,
 142730,
 142752,
 142772,
 142926,
 143222,
 143238,
 145604,
 156605,
 142750,
 142742,
 142774,
 143214,
 143216,
 143226,
 145262,
 145250,
 157935,
 145454,
 145498,
 142728,
 142738,
 145616,
 145504,
 145610,
 145614,
 145502,
 142732,
 143634,
 143224,
 145506,
 142718,
 157871,
 145242,
 166459,
 157845,
 142720,
 142736,
 143668,
 143942,
 145204,
 157877,
 143246,
 143636,
 143648,
 142726,
 145470,
 143666,
 142724,
 143916,
 143656,
 142744,
 142748,
 142734,
 143240,
 157861,
 143242,
 168327,
 143248,
 142928,
 177765,
 143660,
 143944,
 145202,
 157747,
 157757,
 157785,
 157837,
 194635]

In [None]:
# We're ready to test this out!

# Can do this with:

    # UPDATE "PurpleAir Stations"
    # SET channel_state = 3, channel_flags = 4;

    # DELETE FROM "PurpleAir Stations"
    # WHERE sensor_index = 143634;

    # UPDATE "PurpleAir Stations"
    # SET name = 'wrong_name'
    # WHERE sensor_index = 143916;