## This script downloads hourly airquality data from DATABC's ftp server and agregates it by month and year

In [32]:
import pandas as pd
import numpy as np
import urllib.request 
import os

Data_Path = "ftp://ftp.env.gov.bc.ca/pub/outgoing/AIR/AnnualSummary/"

## 2009 is the first year the data is aggregated yearly
Years = [str(x) for x in range(2009,2018)]
## The gas/particulate data available 
Species = ["CO","H2S","NO","NO2","O3","PM10","PM25","SO2","TRS"]

def ReadFile(Data = None):
    if Data is None:
        Data = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        Data['Month']=Data.index.month
        Data['Year']=Data.index.year
    else:
        NewData = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,"PARAMETER":str,
                                     "INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        NewData['Month']=NewData.index.month
        NewData['Year']=NewData.index.year
        Data = Data.append(NewData)
    return(Data)

for species in Species:
    print('Downloading '+species)
    Data = None    
    for year in Years:
        path = Data_Path+year+'/'+species+'.csv'
        urllib.request.urlretrieve(path, 'Temp.csv')
        Data = ReadFile(Data)
    Data = Data.loc['Year']
    # Agregate data by month and write to a file
    AggData = Data.groupby(['STATION_NAME','Month']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv(species+'_Monthly_Averages.csv')
    # Agregate data by year and write to a file
    AggData = Data.groupby(['STATION_NAME','Year']).agg({'RAW_VALUE':'mean'})
    AggData.unstack()['RAW_VALUE'].to_csv(species+'_Yearly_Averages.csv')
    print('Completed '+species,': there are ',AggData.unstack().shape[0],' stations available to analyze.')
    print()
os.remove('Temp.csv')

Downloading CO
Completed CO : there are  4  stations available to analyze.

Downloading H2S
Completed H2S : there are  3  stations available to analyze.

Downloading NO
Completed NO : there are  53  stations available to analyze.

Downloading NO2
Completed NO2 : there are  53  stations available to analyze.

Downloading O3
Completed O3 : there are  46  stations available to analyze.

Downloading PM10
Completed PM10 : there are  21  stations available to analyze.

Downloading PM25
Completed PM25 : there are  66  stations available to analyze.

Downloading SO2
Completed SO2 : there are  56  stations available to analyze.

Downloading TRS
Completed TRS : there are  25  stations available to analyze.

