## This script downloads airquality data from DATABC's ftp server and agregates it by month and year

In [25]:
import pandas as pd
import numpy as np
import urllib.request 
import os

Data_Path = "ftp://ftp.env.gov.bc.ca/pub/outgoing/AIR/AnnualSummary/"

## 2009 is the first year the data is aggregated yearly
Years = [str(x) for x in range(2009,2018)]
## The gas/particulate data available 
Species = ["CO","H2S","NO","NO2","O3","PM10","PM25","SO2","TRS"]

def ReadFile(Data = None):
    if Data is None:
        Data = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                           dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,
                                  "PARAMETER":str,"INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        Data['Month']=Data.index.month
        Data['Year']=Data.index.year
    else:
        NewData = pd.read_csv('Temp.csv',parse_dates={'datetime':[0]},index_col=['datetime'],
                              dtype={"DATE_PST":str,"STATION_NAME":str,"EMS_ID":str,
                                  "PARAMETER":str,"INSTRUMENT":str,"RAW_VALUE":float,"UNIT":str,"ROUNDED_VALUE":float})
        NewData['Month']=NewData.index.month
        NewData['Year']=NewData.index.year
        Data = Data.append(NewData)
    return(Data)

for species in Species:
    print('Downloading '+species)
    Data = None    
    for year in Years:
        path = Data_Path+Years[-1]+'/'+species+'.csv'
        urllib.request.urlretrieve(path, 'Temp.csv')
        Data = ReadFile(Data)
    # Agregate data by month and write to a file
    AggData = Data.groupby(['STATION_NAME','Month']).agg({'RAW_VALUE':'mean'})
    AggData.unstack().to_csv(species+'_Monthly_Averages.csv')
    # Agregate data by year and write to a file
    AggData = Data.groupby(['STATION_NAME','Year']).agg({'RAW_VALUE':'mean'})
    AggData.unstack().to_csv(species+'_Yearly_Averages.csv')
    print('Completed '+species,': there are ',AggData.unstack().shape[0],' stations available to analyze.')
os.remove('Temp.csv')

Downloading CO
Retreived 2009
Retreived 2010
Retreived 2011
Retreived 2012
Retreived 2013
Retreived 2014
Retreived 2015
Retreived 2016
Retreived 2017
Completed CO
There are  4  stations available to analyze.
Downloading H2S
Retreived 2009
Retreived 2010
Retreived 2011
Retreived 2012
Retreived 2013
Retreived 2014
Retreived 2015
Retreived 2016
Retreived 2017
Completed H2S
There are  3  stations available to analyze.
Downloading NO
Retreived 2009
Retreived 2010
Retreived 2011
Retreived 2012
Retreived 2013
Retreived 2014
Retreived 2015
Retreived 2016
Retreived 2017
Completed NO
There are  53  stations available to analyze.
Downloading NO2
Retreived 2009
Retreived 2010
Retreived 2011
Retreived 2012
Retreived 2013
Retreived 2014
Retreived 2015
Retreived 2016
Retreived 2017
Completed NO2
There are  53  stations available to analyze.
Downloading O3
Retreived 2009
Retreived 2010
Retreived 2011
Retreived 2012
Retreived 2013
Retreived 2014
Retreived 2015
Retreived 2016
Retreived 2017
Completed O3

In [65]:
# import pandas as pd

# for DataSet in ['PM25','NOx']:
# Data = pd.read_csv('PM25.csv',parse_dates={'datetime':[0]},index_col=['datetime'])

print(Yearly.unstack().shape)


(118, 18)


In [36]:
# level_values = Data.index.get_level_values
# print(level_values)
# Monthly = Data.groupby(['STATION_NAME']+[pd.Grouper(freq='M', level=0)]).mean()
# Station = Data.groupby('STATION_NAME').first()
# print(Monthly.shape)#=Monthly.index.levels[1].month.values
# for mo in range(1,12):
#     print(Monthly.unstack()['datetime'].month)
# Monthly['RAW_VALUE'].unstack().plot()
# Monthly.reset_index()
# Data.to_csv('PM2_5.csv')


(9251, 2)
