## This script downloads hourly airquality data from DATABC's ftp server and agregates it by month and year

In [None]:
import pandas as pd
import numpy as np
import urllib.request 
import os

Data_Path = "ftp://ftp.env.gov.bc.ca/pub/outgoing/AIR/AnnualSummary/"

## 2009 is the first year the data is aggregated yearly
Years = [str(x) for x in range(2009,2020)]
## The gas/particulate data available 
Species = ["O3","PM25"] # Other Species Avaialble - "CO","H2S","NO","NO2","PM10","SO2","TRS"

def ReadFile(Data = None):
    if Data is None:
        Data = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        Data['Month']=Data.index.month
        Data['Year']=Data.index.year
    else:
        NewData = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        NewData['Month']=NewData.index.month
        NewData['Year']=NewData.index.year
        Data = Data.append(NewData)
    return(Data)

for species in Species:
    print('Downloading '+species)
    Data = None    
    for year in Years:
        path = Data_Path+year+'/'+species+'.csv'
        urllib.request.urlretrieve(path, 'Temp.csv')
        Data = ReadFile(Data)
    # The datafiles for last year contain a few observations for Jan of the current year, we need to remove those!
    Data = Data.loc[Data['Year']<2021].copy()
    
    # Some stations have lots of missing data.  We only want to keep the ones that have at least 50% coverage
    Keep = Data.groupby('EMS_ID').count()['STATION_NAME']
    Keep = Keep[Keep>Keep.max()*.5].index
    Data = Data.loc[Data['EMS_ID'].isin(Keep)].copy()
    
    # Agregate data by month and write to a file
    AggData = Data.groupby(['STATION_NAME','Month']).agg({'RAW_VALUE':'mean'})
    AggData = Data.groupby(['STATION_NAME','Month']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv(species+'_Monthly_Averages.csv')
    # Agregate data by year and write to a file
    AggData = Data.groupby(['STATION_NAME','Year']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv(species+'_Yearly_Averages.csv')
    
    Data['Year_Month'] = Data['Year']*100+Data['Month']
    AggData = Data.groupby(['STATION_NAME','Year_Month']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv(species+'_Timeseries.csv')
    print('Completed '+species,': there are ',AggData.unstack().shape[0],' stations available to analyze.')
    print()
os.remove('Temp.csv')
print('Done!!')

In [7]:
Sites = pd.read_csv('ftp://ftp.env.gov.bc.ca/pub/outgoing/AIR/Air_Monitoring_Stations/bc_air_monitoring_stations.csv')
Sites.to_csv('MonitoringStations.csv')
Sites

Unnamed: 0,STATION_NAME_FULL,STATION_NAME,EMS_ID,NAPS_ID,SERIAL,CITY,LAT,LONG,ELEVATION,STATUS_DESCRIPTION,OWNER,REGION,AIRZONE,STATUS,OPENED,CLOSED
0,100 Mile House,100 Mile House,M116006,,374,100 Mile House,51.65420,-121.375000,1000.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,1992-11-11,
1,100 Mile House BCAC,100 Mile House BCAC,E218444,,228,100 MIle House,51.64610,-121.937000,0.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2010-02-16,
2,Abbotsford A Columbia Street,Abbotsford A Columbia Street,E289309,,428,Abbotsford,49.02150,-122.326600,65.0,METRO VANCOUVER,MVRD,02 - Lower Mainland,Lower Fraser Valley,ON,2012-07-25,
3,Abbotsford A Columbia Street Met,Abbotsford A Columbia Street,E289309,,429,Abbotsford,49.02150,-122.326600,65.0,METRO VANCOUVER,MVRD,02 - Lower Mainland,Lower Fraser Valley,ON,2012-07-25,
4,Abbotsford Airport,Abbotsford Airport,0310081,,306,Abbotsford,49.03060,-122.376100,40.0,NON OPERATIONAL,MVRD,02 - Lower Mainland,,OFF,1978-01-07,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
451,Williams Lake Columneetza School,Williams Lake Columneetza School,0550502,102701.0,16,Williams Lake,52.14428,-122.150391,631.0,OPERATIONAL,ENV,05 - Cariboo,Central Interior,ON,1992-04-15,
452,Williams Lake Columneetza School_60,Williams Lake Columneetza School,0550502,102701.0,549,Williams Lake,52.14428,-122.150391,631.0,OPERATIONAL,ENV,05 - Cariboo,Central Interior,ON,2018-08-01,
453,Williams Lake CRD Library,Williams Lake CRD Library,E248797,102706.0,200,Williams Lake,52.13083,-122.142220,609.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2002-07-26,2014-06-17
454,Williams Lake Skyline School,Williams Lake Skyline School,0605020,,378,Williams Lake,52.11610,-122.132500,650.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2001-08-08,


Unnamed: 0_level_0,DATE,TIME,STATION_NAME,STATION_NAME_FULL,EMS_ID,NAPS_ID,RAW_VALUE,ROUNDED_VALUE,UNIT,INSTRUMENT,PARAMETER,OWNER,REGION,Month,Year
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2009-01-01 01:00:00,2009-01-01,01:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,7.70000,7.7,ppb,NOX_APIT200,NO2,ENV,01 - Vancouver Island,1,2009
2009-01-01 02:00:00,2009-01-01,02:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,8.10000,8.1,ppb,NOX_APIT200,NO2,ENV,01 - Vancouver Island,1,2009
2009-01-01 03:00:00,2009-01-01,03:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,1.00000,1.0,ppb,NOX_APIT200,NO2,ENV,01 - Vancouver Island,1,2009
2009-01-01 04:00:00,2009-01-01,04:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,1.40000,1.4,ppb,NOX_APIT200,NO2,ENV,01 - Vancouver Island,1,2009
2009-01-01 05:00:00,2009-01-01,05:00,Victoria Topaz,VICTORIA TOPAZ,E231866,100304,,,ppb,NOX_APIT200,NO2,ENV,01 - Vancouver Island,1,2009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-31 20:00:00,2015-12-31,20:00,Pitt Meadows Meadowlands School,PITT MEADOWS MEADOWLANDS SCHOOL,E232244,10,17.81763,17.8,ppb,UNSPECIFIED,NO2,MVRD,02 - Lower Mainland,12,2015
2015-12-31 21:00:00,2015-12-31,21:00,Pitt Meadows Meadowlands School,PITT MEADOWS MEADOWLANDS SCHOOL,E232244,10,13.39902,13.4,ppb,UNSPECIFIED,NO2,MVRD,02 - Lower Mainland,12,2015
2015-12-31 22:00:00,2015-12-31,22:00,Pitt Meadows Meadowlands School,PITT MEADOWS MEADOWLANDS SCHOOL,E232244,10,11.81053,11.8,ppb,UNSPECIFIED,NO2,MVRD,02 - Lower Mainland,12,2015
2015-12-31 23:00:00,2015-12-31,23:00,Pitt Meadows Meadowlands School,PITT MEADOWS MEADOWLANDS SCHOOL,E232244,10,11.94426,11.9,ppb,UNSPECIFIED,NO2,MVRD,02 - Lower Mainland,12,2015
