In [2]:
import pandas as pd
from pathlib import Path
import configparser
from psycopg2 import connect
import datetime
CONFIG = configparser.ConfigParser()
CONFIG.read(str(Path.home().joinpath('db.cfg'))) #Creates a path to your db.cfg file
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

Identified network wide data outages that have seriously impacted data availability in 2022-2023.  
Below you can see examples of outages spanning weeks. 

In [6]:
sql = '''
SELECT time_range, duration_days
FROM gwolofs.network_outages
ORDER BY time_start DESC;'''

with con:
    my_data = pd.read_sql(sql, con)
    print(my_data.head(10))

                                   time_range  duration_days
0  [2023-05-12 21:30:00, 2023-05-12 21:45:00]       0.010417
1  [2023-05-12 08:00:00, 2023-05-12 08:45:00]       0.031250
2  [2023-04-19 04:30:00, 2023-05-08 13:00:00]      19.354167
3  [2023-04-17 00:00:00, 2023-04-17 23:45:00]       0.989583
4  [2023-04-15 08:15:00, 2023-04-15 10:00:00]       0.072917
5  [2023-04-02 06:45:00, 2023-04-12 23:45:00]      10.708333
6  [2023-03-22 00:00:00, 2023-03-22 23:45:00]       0.989583
7  [2023-03-05 06:45:00, 2023-03-21 16:45:00]      16.416667
8  [2023-01-25 01:45:00, 2023-02-22 15:15:00]      28.562500
9  [2023-01-22 07:45:00, 2023-01-22 08:30:00]       0.031250


Note data availability in 2022-2023 from network wide outages ALONE.  
**2023 percent_active is about 30%**, not 15% (didn't account for ~half year up till now below). 

In [23]:
sql = '''
--full list of dates
WITH full_dates AS(
    SELECT generate_series('2017-01-01 00:00'::timestamp, '2023-05-30 23:45', INTERVAL '15 MINUTES') AS time_bin
)

SELECT 
    EXTRACT(YEAR FROM time_bin) AS year, 
    COUNT(1) / (365 * 24 * 4)::decimal AS percent_active,
    (365) - (COUNT(1) / (24 * 4)::decimal) AS days_inactive
FROM full_dates AS fd
LEFT JOIN gwolofs.network_outages AS ro ON ro.time_range @> fd.time_bin
WHERE ro.time_range IS NULL
GROUP BY 1;'''

with con:
    my_data = pd.read_sql(sql, con)
    print(my_data.head(20))

     year  percent_active  days_inactive
0  2017.0        0.869121      47.770833
1  2018.0        0.896461      37.791667
2  2019.0        0.732277      97.718750
3  2020.0        0.824686      63.989583
4  2021.0        0.844949      56.593750
5  2022.0        0.574486     155.312500
6  2023.0        0.152140     309.468750


Individual detector outages also contribute significantly to lack of data availability. 

In [20]:
sql = '''
SELECT 
    detector_id, 
    sum(duration_days) - 
        (SELECT SUM(duration_days) FROM gwolofs.network_outages) --total network outages
            AS duration_days
FROM gwolofs.rescu_individual_outages
GROUP BY 1
ORDER BY 2 DESC;'''

with con:
    my_data = pd.read_sql(sql, con)
    print(my_data.head(10))

  detector_id  duration_days
0   DW0100DWL    1628.781250
1   DW0020DWG    1574.375000
2   DW0050DWL    1572.541667
3   DW0070DEL    1554.947917
4   DW0020DEG    1541.718750
5   DW0040DER    1526.416667
6   DW0030DWL    1525.291667
7   DW0030DWR    1517.947917
8   DS0040DSR    1339.395833
9   DW0070DEG    1333.510417


In [26]:
sql = '''
SELECT detector_id, primary_road, cross_road, bins_active_percent, bins_active, last_active
FROM gwolofs.i0617_rescu_sensor_eval
WHERE classify = 'good'
'''

with con:
    my_data = pd.read_sql(sql, con)
    print('Good detectors')
    print(my_data.head(10))
    
sql = '''
SELECT detector_id, primary_road, cross_road, bins_active_percent, bins_active, last_active
FROM gwolofs.i0617_rescu_sensor_eval
WHERE classify = 'bad'
'''

with con:
    my_data = pd.read_sql(sql, con)    
    print('Bad (mostly inactive) detectors')
    print(my_data.head(10))
    
sql = '''
SELECT detector_id, primary_road, cross_road, bins_active_percent, bins_active, last_active
FROM gwolofs.i0617_rescu_sensor_eval
WHERE classify = 'inactive'
'''

with con:
    my_data = pd.read_sql(sql, con)
    print('Inactive detectors')
    print(my_data.head(10))

Good detectors
  detector_id          primary_road      cross_road  bins_active_percent  \
0   DE0020DEG     F G Gardiner Xy E   JARVIS STREET                0.829   
1   DE0020DWG     F G Gardiner Xy W   JARVIS STREET                0.829   
2   DN0090DND  Don Valley Parkway N    St Dennis Dr                0.996   
3   DN0090DSD  Don Valley Parkway S    St Dennis Dr                0.996   
4   DN0095DSD  Don Valley Parkway S   Spanbridge Rd                0.829   
5   DN0100DND  Don Valley Parkway N    St Dennis Dr                0.835   
6   DN0100DSD  Don Valley Parkway S    St Dennis Dr                0.835   
7   DN0110DND  Don Valley Parkway N  Eglinton Ave E                0.999   
8   DN0120DSD  Don Valley Parkway S      Wynford Dr                0.828   
9   DN0125DND  Don Valley Parkway N     LAWRENCE AV                0.821   

   bins_active         last_active  
0         3195 2023-05-15 14:30:00  
1         3195 2023-05-15 14:30:00  
2         3839 2023-05-15 14:30:00  


Plotted detectors in QGIS to identify geographic distribution which is lacking. 