## This script downloads hourly airquality data from DATABC's ftp server and agregates it by month and year

In [7]:
import pandas as pd
import numpy as np
import urllib.request 
import os


Host = "ftp://ftp.env.gov.bc.ca/"


## 2009 is the first year the data is aggregated yearly - The 2021 data is not available yet
Last_Year = 2020
Years = [str(x) for x in range(2010,Last_Year+1)]
## The gas/particulate data available 
Species = ["TEMP_MEAN","PM25","O3","CO"] # Other Species Avaialble - "H2S","NO","NO2","PM10","SO2","TRS"

def ReadFile(Data = None):
    if Data is None:
        Data = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float},encoding='ISO-8859-1')
        Data['Month']=Data.index.month
        Data['Year']=Data.index.year
    else:
        NewData = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float},encoding='ISO-8859-1')
        NewData['Month']=NewData.index.month
        NewData['Year']=NewData.index.year
        Data = Data.append(NewData)
    return(Data)

for species in Species:
    print('Downloading '+species)
    Data = None    
    for year in Years:
        print(year)
        Data_Path = "pub/outgoing/AIR/AnnualSummary/"
        path = Host+Data_Path+year+'/'+species+'.csv'
        urllib.request.urlretrieve(path, 'Temp.csv')
        Data = ReadFile(Data)
#     if Last_Year
    
    Data_Path = 'pub/outgoing/AIR/Hourly_Raw_Air_Data/Year_to_Date/'
    path = Host+Data_Path+species+'.csv'
    urllib.request.urlretrieve(path, 'Temp.csv')
    Data = ReadFile(Data)
    # The datafiles for last year contain a few observations for Jan of the current year, we need to remove those!
#     Data = Data.loc[Data['Year']<=Last_Year].copy()
    
    # Some stations have lots of missing data.  We only want to keep the ones that have at least 50% coverage.
    Keep = Data.groupby('EMS_ID').count()['STATION_NAME']
    Keep = Keep[Keep>Keep.max()*.5].index
    Data = Data.loc[Data['EMS_ID'].isin(Keep)].copy()
    
    # Agregate data by year and write to a file
    if species == 'TEMP_MEAN':
        species = 'TEMP'
    
    AggData = Data.groupby(['EMS_ID','Year']).agg({'RAW_VALUE':'mean'})
    Yearly = AggData.unstack()['RAW_VALUE'].to_csv('Data/'+species+'_Yearly_Averages.csv')
    
    Data['Year_Month'] = Data['Year']*100+Data['Month']
    AggData = Data.groupby(['EMS_ID','Year_Month']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv('Data/'+species+'_MonthlyAverages.csv')
    
       
    Summer = Data.loc[((Data['Month']>=6)&(Data['Month']<=9))].copy()
    AggData = Summer.groupby(['EMS_ID','Year']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv('Data/'+species+'_SummerAverages.csv')
    
    Last_Year = Data.loc[Data.index.year>=2021]

    Last_Year=Last_Year.groupby([Last_Year.index,Last_Year.EMS_ID]).mean().unstack()['RAW_VALUE']

    Last_Year.to_csv('Data/'+species+'_Daily_Averages_2021_Onward.csv')
    
    print('Completed '+species,': there are ',AggData.unstack().shape[0],' stations available to analyze.')
    print()
os.remove('Temp.csv')
print('Done!!')

Downloading TEMP_MEAN
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
Completed TEMP : there are  47  stations available to analyze.

Downloading PM25
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
Completed PM25 : there are  48  stations available to analyze.

Downloading O3
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
Completed O3 : there are  41  stations available to analyze.

Downloading CO
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
Completed CO : there are  21  stations available to analyze.

Done!!


In [6]:
Sites = pd.read_csv('ftp://ftp.env.gov.bc.ca/pub/outgoing/AIR/Air_Monitoring_Stations/bc_air_monitoring_stations.csv')
Sites.to_csv('MonitoringStations.csv')
Sites

Unnamed: 0,STATION_NAME_FULL,STATION_NAME,EMS_ID,NAPS_ID,SERIAL,CITY,LAT,LONG,ELEVATION,STATUS_DESCRIPTION,OWNER,REGION,AIRZONE,STATUS,OPENED,CLOSED
0,100 Mile House,100 Mile House,M116006,,374,100 Mile House,51.65420,-121.375000,1000.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,1992-11-11,
1,100 Mile House BCAC,100 Mile House BCAC,E218444,,228,100 MIle House,51.64610,-121.937000,0.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2010-02-16,
2,Abbotsford A Columbia Street,Abbotsford A Columbia Street,E289309,,428,Abbotsford,49.02150,-122.326600,65.0,METRO VANCOUVER,MVRD,02 - Lower Mainland,Lower Fraser Valley,ON,2012-07-25,
3,Abbotsford A Columbia Street Met,Abbotsford A Columbia Street,E289309,,429,Abbotsford,49.02150,-122.326600,65.0,METRO VANCOUVER,MVRD,02 - Lower Mainland,Lower Fraser Valley,ON,2012-07-25,
4,Abbotsford Airport,Abbotsford Airport,0310081,,306,Abbotsford,49.03060,-122.376100,40.0,NON OPERATIONAL,MVRD,02 - Lower Mainland,,OFF,1978-01-07,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,Williams Lake Columneetza School_60,Williams Lake Columneetza School,0550502,102701.0,549,Williams Lake,52.14428,-122.150391,631.0,OPERATIONAL,ENV,05 - Cariboo,Central Interior,ON,2018-08-01,
459,Williams Lake CRD Library,Williams Lake CRD Library,E248797,102706.0,200,Williams Lake,52.13083,-122.142220,609.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2002-07-26,2014-06-17
460,Williams Lake Skyline School,Williams Lake Skyline School,0605020,,378,Williams Lake,52.11610,-122.132500,650.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2001-08-08,
461,Williams Lake Water Tower,Williams Lake Water Tower,E222242,,230,Williams Lake,51.64610,-121.293700,0.0,NON OPERATIONAL,ENV,05 - Cariboo,,OFF,2010-02-16,


In [3]:
import chardet
with open('Temp.csv', 'rb') as rawdata:
    result = chardet.detect(rawdata.read(100000))
result

{'encoding': 'ISO-8859-1', 'confidence': 0.73, 'language': ''}