# Download Environment Canada Weather Data for BC

This notebook can be used to browse and download hourly weather data from Environment Canada Weather Stations

In [183]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import datetime
import folium
import requests
import os, shutil

%matplotlib notebook

# Data Coverage

* Define the time period to search for and missing data threshold

In [184]:
Start = datetime.datetime.strptime('2010-01-01', '%Y-%m-%d')
End = datetime.datetime.now()
Period_Length = (End-Start)


Start_Year = Start.year
End_Year = End.year

# Proportoin of Missing Data that is Acceptable
# For a given month/year
Missing_Data_Threshold = .1

# Met Stations

Filter Data:
* Start year (optional)
    * Calculate record lengths
* Provinces

In [185]:

Provinces = ['BRITISH COLUMBIA']

End_Year = datetime.datetime.now().year

Stations = pd.read_csv('station_list/Stations.csv',skiprows=0)

Dates = ['FIRST_DATE','LAST_DATE','HLY_FIRST_DATE','HLY_LAST_DATE','DLY_FIRST_DATE','DLY_LAST_DATE',
         'MLY_FIRST_DATE','MLY_LAST_DATE']

for date in Dates:
    Stations[date] = pd.to_datetime(Stations[date])
    
Stations['HLY_REC_LENGTH'] = Stations['HLY_LAST_DATE']-Stations['HLY_FIRST_DATE']

Stations['HLY_REC_LENGTH'] 

# Find stations that are currently collecting data in BC
Stations = Stations.loc[((Stations['HLY_LAST_DATE'].dt.year>=datetime.datetime.now().year)&
                        (Stations['ENG_PROV_NAME'].isin(Provinces)))].copy()

Stations = Stations.set_index(Stations['STATION_NAME'],drop = True)


Stations['STATION_NAME'].count()

110

# Webmap of Stations

In [247]:

token = "pk.eyJ1IjoianVuZXNwYWNlYm9vdHMiLCJhIjoiY2s4a285NTM1MDQwbDNocHozdXlkNzIyaSJ9.-_8gh9gG4VuprIPnmXBr3A" # your mapbox token
tileurl = 'https://api.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}@2x.png?access_token=' + str(token)

Map = folium.Map(
    location=[53,-121], zoom_start=5)#, tiles=tileurl, attr='Mapbox')

def pop_Up_Table(row):
    pop_up='''<table>
    <tr><td><b>Station Name: </b></td>><td>'''+row['STATION_NAME']+'''</td><tr>
    <tr><td><b>Start Date: </b></td>><td>'''+row['HLY_FIRST_DATE'].strftime("%d/%m/%Y")+'''</td><tr>
    <tr><td><b>End Date: </b></td>><td>'''+row['HLY_LAST_DATE'].strftime("%d/%m/%Y")+'''</td><tr>
    </table>
    '''
    return (pop_up)

for i,row in Stations.iterrows():
#     print(row['Latitude'],row['Longitude'])
    point=folium.CircleMarker(
            location=[row['y'],row['x']],
            radius=5,
            popup=i,
            fill_opacity = 1,
            fill=True,
            color='black',
            line_weight=.35,
            fill_color='blue'
    ).add_to(Map)

Map.save('Summaries/StationMap.png')

import io
from PIL import Image

# img_data = Map._to_png(5)
# img = Image.open(io.BytesIO(img_data))
# img.save('image.png')

Map

# Download Data

In [243]:
def Clear(folder):
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
Path = 'Data/'
print('Cleaning ' +Path+ ' Folder')
Clear(Path)


for i,row in Stations.iterrows():
    Root = 'https://api.weather.gc.ca/collections/climate-hourly/items?'

    Range = 'datetime='+str(Start_Year)+'-01-01%2000:00:00/'+str(End_Year)+'-12-31%2000:00:00'

    Station = '&CLIMATE_IDENTIFIER='+str(row['CLIMATE_IDENTIFIER'])

    Last = '&sortby=LOCAL_DATE&f=csv&limit=100000&startindex=0'

    Download = (Root+Range+Station+Last)
    response = requests.get(Download)
    open(Path+str(i)+".csv", "wb").write(response.content)
    print('Downloaded: ',i)
# Selection.to_csv(Path+'Inventory.csv')
# print('Download Complete')

Cleaning Data/ Folder
Downloaded:  SANDSPIT
Downloaded:  DELTA BURNS BOG
Downloaded:  HOPE A
Downloaded:  SMITHERS A
Downloaded:  WILLIAMS LAKE A
Downloaded:  FORT ST. JOHN A
Downloaded:  VICTORIA INTL A
Downloaded:  CASTLEGAR A
Downloaded:  FORT NELSON
Downloaded:  PITT MEADOWS CS
Downloaded:  DEASE LAKE (AUT)
Downloaded:  PORT ALBERNI (AUT)
Downloaded:  GREY ISLET (AUT)
Downloaded:  VICTORIA GONZALES CS
Downloaded:  PRINCE GEORGE
Downloaded:  SMITHERS
Downloaded:  MACKENZIE
Downloaded:  PENTICTON A
Downloaded:  KELOWNA UBCO
Downloaded:  VANCOUVER INTL A
Downloaded:  CLINTON A
Downloaded:  SMITHERS AIRPORT AUTO
Downloaded:  POWELL RIVER A
Downloaded:  SQUAMISH AIRPORT
Downloaded:  PRINCETON A
Downloaded:  TOFINO A
Downloaded:  CRANBROOK A
Downloaded:  SPARWOOD CS
Downloaded:  SOLANDER ISLAND (AUT)
Downloaded:  COMOX A
Downloaded:  LYTTON RCS
Downloaded:  KELOWNA
Downloaded:  PRINCE RUPERT
Downloaded:  SANDSPIT AIRPORT AUTO
Downloaded:  PORT HARDY A
Downloaded:  VANCOUVER SEA ISLAND CC

In [248]:

Range = pd.date_range(Start,End,freq='H')
Precip=Data[['Obs']].copy()
Temp=Data[['Obs']].copy()
AllData = {
    'PRECIP_AMOUNT':Precip,
    'TEMP':Temp
}
Names = {
    'PRECIP_AMOUNT':'Precpitation',
    'TEMP':'Temperature'
}

for subdir, dirs, files in os.walk(Path):
    for file in files:
        filepath = subdir + os.sep + file

        if filepath.endswith(".csv"):
            SomeData = pd.read_csv(filepath,parse_dates={'Datetime':['LOCAL_DATE']},
                               low_memory=False,
                               dtype = {'CLIMATE_IDENTIFIER':str})
            SomeData = SomeData.set_index('Datetime')
            SomeData['PRECIP_AMOUNT'] = SomeData['PRECIP_AMOUNT'].astype(float)
            # Drop unrealistic values - 
            SomeData.loc[SomeData['PRECIP_AMOUNT']>=25,'PRECIP_AMOUNT']=np.nan
            
            for key,val, in AllData.items():
                Data = pd.DataFrame(data = {'Obs':np.ones(Range.shape)},index=Range)
                Data = Data.join(SomeData[key])
                Clim_ID = str(SomeData['CLIMATE_IDENTIFIER'][0])
                Data = Data.rename(columns = {key:Clim_ID})
                Year = Data.resample('Y').count()
                Year['Pct'] = Year[Clim_ID]/Year['Obs']
                AllData[key] = AllData[key].join(
                            Data.loc[
                                Data.index.year.isin(Year.loc[Year['Pct']>=(1-Missing_Data_Threshold)].index.year),
                                Clim_ID
                                    ])

for key,val, in AllData.items():
    if key == 'PRECIP_AMOUNT':
        Yearly = val.resample('Y').sum().replace([0],np.nan)
        agg = 'Totals'
    else:
        Yearly = val.resample('Y').mean()
        agg = 'Averages'
    Yearly = Yearly.drop(columns='Obs')
    Yearly.index=Yearly.index.year
    Yearly = Yearly.T
    Yearly.index = Yearly.index.rename('CLIMATE_IDENTIFIER')
    if key == 'PRECIP_AMOUNT':
        Monthly = val.groupby(val.index.month).sum()
    else:
        Monthly = val.groupby(val.index.month).mean()
    Monthly= Monthly.drop(columns='Obs')
    Monthly = Monthly.T
    Monthly.index = Monthly.index.rename('CLIMATE_IDENTIFIER')
    Monthly.to_csv('Summaries/Monthly_'+Names[key]+'_'+agg+'.csv')
    Yearly.to_csv('Summaries/Yearly_'+Names[key]+'_'+agg+'.csv')
Stations.to_csv('Summaries/BC_Precip_Stations.csv')
print('Done')

Done
