In [1]:
import pandas as pd
import matplotlib.pyplot as plt

import re

from datetime import datetime, timedelta
from dateutil import rrule

from bs4 import BeautifulSoup
import requests

Sample webpage to see weather stations in canada

https://climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?searchType=stnProv&timeframe=1&lstProvince=&optLimit=yearRange&StartYear=1840&EndYear=2023&Year=2023&Month=10&Day=4&selRowPerPage=100&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongMin=0&txtCentralLongSec=0&startRow=8801

In [2]:
stations = pd.read_excel('Weather_Stations.xlsx')
stations

Unnamed: 0,StationID,Name,Intervals,Year Start,Year End,Province,Duration,Observed_YS,Observed_YE,Observed_Dr
0,10700,(AE) BOW SUMMIT,"['Daily', 'Monthly']",1998,2007,AB,9,NaT,NaT,0
1,155,COMOX A,"['Hourly', 'Daily', 'Monthly']",1953,2023,BC,70,1953-01-01,2023-10-15,70
2,2030,ACADIA VALLEY,"['Daily', 'Monthly']",1980,1991,AB,11,NaT,NaT,0
3,47748,ACADIA VALLEY,"['Hourly', 'Daily']",2009,2023,AB,14,NaT,NaT,0
4,2032,ACADIA VALLEY CDA EPF,"['Daily', 'Monthly']",1965,1965,AB,0,NaT,NaT,0
...,...,...,...,...,...,...,...,...,...,...
8814,48168,WHITEHORSE AUTO,"['Hourly', 'Daily']",2009,2023,YT,14,NaT,NaT,0
8815,1618,WHITEHORSE RIVERDALE,"['Daily', 'Monthly']",1959,2012,YT,53,NaT,NaT,0
8816,26988,WHITEHORSE WSO,"['Daily', 'Monthly']",1996,1998,YT,2,NaT,NaT,0
8817,1619,WOLF CREEK,"['Daily', 'Monthly']",1969,1974,YT,5,NaT,NaT,0


In [3]:
stID = {
        155 : ['COMOX A', 1953, 2023],
        6781: ['HOPEDALE (AUT)', 1953, 2023],
        6354: ['GREENWOOD A', 1953, 2023],
        3987: ['ARMSTRONG (AUT)', 1953, 2023],
        5126: ['TRENTON A', 1953, 2023],
        2832: ['COLD LAKE A', 1954, 2023],
        1739: ['CAPE DYER', 1955, 2023],
        1633: ['CAPE PARRY A', 1956, 2023],
        3649: ['PILOT MOUND (AUT)', 1957, 2023],
        1556: ['HAINES JUNCTION', 1960, 2023]
        }

In [5]:
def get_data(stationID, year, month, by = 2):
    
    base_url = "http://climate.weather.gc.ca/climate_data/bulk_data_e.html?"
    
    if by==1:
        query_url = "format=csv&stationID={}&Year={}&Month={}&timeframe=1".format(stationID, year, month)
    
    elif by==2:
        query_url = "format=csv&stationID={}&Year={}&timeframe=2".format(stationID, year)
    
    api_endpoint = base_url + query_url
    
    return pd.read_csv(api_endpoint, skiprows=0)

In [9]:
# Daily Data
weather_data = pd.DataFrame()
by = 2
for stationID, j in stID.items():
    
    print(f"\n[{datetime.now()}] Retrieving weather data for station \t: {j[0]}")

    start_date = datetime.strptime(f'jan{j[1]}', '%b%Y')
    end_date = datetime.strptime(f'dec{j[2]}', '%b%Y')
    
    frames = pd.DataFrame()
    
    for dt in rrule.rrule(rrule.MONTHLY if by==1 else rrule.YEARLY, dtstart=start_date, until=end_date):
        df = get_data(stationID, dt.year, dt.month, by)
        frames = pd.concat([df,frames])

        frames = frames[frames['Mean Temp (°C)'].notna()].copy()

    if by==1: frames.to_csv(f'data/Hourly_{j[0]}_{stationID}.csv')
    if by==2: frames.to_csv(f'data/Daily_{j[0]}_{stationID}.csv')

    weather_data = pd.concat([weather_data,frames])
    print("\t\t\t\tFinal weather Dataframe Shape\t\t:", weather_data.shape)
    
#     except:
#         print('\nFailed for StationID\t\t\t\t\t\n: ',stationID)
#         continue
weather_data.to_csv('daily_weather_data_19Nov.csv')


[2023-11-19 17:01:56.863743] Retrieving weather data for station 	: COMOX A
				Final weather Dataframe Shape		: (25860, 31)

[2023-11-19 17:04:16.173132] Retrieving weather data for station 	: HOPEDALE (AUT)
				Final weather Dataframe Shape		: (46702, 31)

[2023-11-19 17:06:26.626206] Retrieving weather data for station 	: GREENWOOD A
				Final weather Dataframe Shape		: (72587, 31)

[2023-11-19 17:08:45.898187] Retrieving weather data for station 	: ARMSTRONG (AUT)
				Final weather Dataframe Shape		: (93035, 31)

[2023-11-19 17:10:50.996246] Retrieving weather data for station 	: TRENTON A
				Final weather Dataframe Shape		: (118312, 31)

[2023-11-19 17:13:22.365171] Retrieving weather data for station 	: COLD LAKE A
				Final weather Dataframe Shape		: (143747, 31)

[2023-11-19 17:15:40.493529] Retrieving weather data for station 	: CAPE DYER
				Final weather Dataframe Shape		: (149629, 31)

[2023-11-19 17:17:46.851021] Retrieving weather data for station 	: CAPE PARRY A
				Fi

In [6]:
stID

{155: ['COMOX A', 1953, 2023],
 6781: ['HOPEDALE (AUT)', 1953, 2023],
 6354: ['GREENWOOD A', 1953, 2023],
 3987: ['ARMSTRONG (AUT)', 1953, 2023],
 5126: ['TRENTON A', 1953, 2023],
 2832: ['COLD LAKE A', 1954, 2023],
 1739: ['CAPE DYER', 1955, 2023],
 1633: ['CAPE PARRY A', 1956, 2023],
 3649: ['PILOT MOUND (AUT)', 1957, 2023],
 1556: ['HAINES JUNCTION', 1960, 2023]}

In [9]:
# Hourly data
weather_data = pd.DataFrame()
by = 1
for stationID, j in {1556: ['HAINES JUNCTION', 1960, 2023]}.items():
#     try:
    print(f"\n[{datetime.now()}] Retrieving weather data for station \t: {j[0]}")

    start_date = datetime.strptime(f'jan{j[1]}', '%b%Y')
    end_date = datetime.strptime(f'dec{j[2]}', '%b%Y')

    frames = pd.DataFrame()

    for dt in rrule.rrule(rrule.MONTHLY if by==1 else rrule.YEARLY, dtstart=start_date, until=end_date):
        df = get_data(stationID, dt.year, dt.month, by)
        frames = pd.concat([df,frames])

#             frames = frames[frames['Mean Temp (°C)'].notna()].copy()

    if by==1: frames.to_csv(f'data/Hourly_{j[0]}_{stationID}.csv')
    if by==2: frames.to_csv(f'data/Daily_{j[0]}_{stationID}.csv')

    weather_data = pd.concat([weather_data,frames])
    print("\t\t\t\tFinal weather Dataframe Shape\t\t:", weather_data.shape)
    
#     except:
#         print('\nFailed for StationID\t\t\t\t\t\n: ',stationID)
#         continue
# weather_data.to_csv('hourly_weather_data_19Nov.csv')


[2023-11-20 15:01:28.569518] Retrieving weather data for station 	: HAINES JUNCTION
				Final weather Dataframe Shape		: (561024, 30)


In [13]:
frames.isnull().sum()/frames.shape[0]

Longitude (x)          0.000000
Latitude (y)           0.000000
Station Name           0.000000
Climate ID             0.000000
Date/Time (LST)        0.000000
Year                   0.000000
Month                  0.000000
Day                    0.000000
Time (LST)             0.000000
Temp (°C)              0.599652
Temp Flag              0.997587
Dew Point Temp (°C)    0.602685
Dew Point Temp Flag    0.995212
Rel Hum (%)            0.602687
Rel Hum Flag           0.995186
Precip. Amount (mm)    0.912900
Precip. Amount Flag    0.906088
Wind Dir (10s deg)     0.741278
Wind Dir Flag          0.997690
Wind Spd (km/h)        0.598636
Wind Spd Flag          0.998590
Visibility (km)        0.961449
Visibility Flag        0.856749
Stn Press (kPa)        0.794547
Stn Press Flag         0.841228
Hmdx                   0.997929
Hmdx Flag              1.000000
Wind Chill             0.913246
Wind Chill Flag        1.000000
Weather                0.961447
dtype: float64

In [17]:
weather_data

Unnamed: 0,Longitude (x),Latitude (y),Station Name,Climate ID,Date/Time (LST),Year,Month,Day,Time (LST),Temp (°C),...,Wind Spd Flag,Visibility (km),Visibility Flag,Stn Press (kPa),Stn Press Flag,Hmdx,Hmdx Flag,Wind Chill,Wind Chill Flag,Weather
0,-98.9,49.19,PILOT MOUND (AUT),5022125,2023-12-01 00:00,2023,12,1,00:00,,...,,,,,,,,,,
1,-98.9,49.19,PILOT MOUND (AUT),5022125,2023-12-01 01:00,2023,12,1,01:00,,...,,,,,,,,,,
2,-98.9,49.19,PILOT MOUND (AUT),5022125,2023-12-01 02:00,2023,12,1,02:00,,...,,,,,,,,,,
3,-98.9,49.19,PILOT MOUND (AUT),5022125,2023-12-01 03:00,2023,12,1,03:00,,...,,,,,,,,,,
4,-98.9,49.19,PILOT MOUND (AUT),5022125,2023-12-01 04:00,2023,12,1,04:00,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,-98.9,49.19,PILOT MOUND (AUT),5022125,1957-01-31 19:00,1957,1,31,19:00,,...,,,,,,,,,,
740,-98.9,49.19,PILOT MOUND (AUT),5022125,1957-01-31 20:00,1957,1,31,20:00,,...,,,,,,,,,,
741,-98.9,49.19,PILOT MOUND (AUT),5022125,1957-01-31 21:00,1957,1,31,21:00,,...,,,,,,,,,,
742,-98.9,49.19,PILOT MOUND (AUT),5022125,1957-01-31 22:00,1957,1,31,22:00,,...,,,,,,,,,,


#### References

1. Siang Lim (https://github.com/csianglim/weather-gc-ca-python/blob/master/Part%20I%20-%20Data%20Extraction%20and%20Cleaning.ipynb)