In [1]:
import pandas as pd
import io
import os
from google.colab import drive
import numpy as np
from datetime import datetime

In [2]:
# This will allow us to save files in Google Drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


#### 1) First, we will define a function that will allow us to extract weather data from Canada Gov

In [3]:
def getHourlyData(stationID, year, month):
    base_url = "http://climate.weather.gc.ca/climate_data/bulk_data_e.html?"
    query_url = "format=csv&stationID={}&Year={}&Month={}&timeframe=1".format(stationID, year, month)
    api_endpoint = base_url + query_url
    return pd.read_csv(api_endpoint, skiprows=0)

#### 2) We get the weather for only one weather station McTavish (ID=30165)

In [4]:
stationID = 30165
year_list = [2015,2016,2017,2018,2019]

frames = []
for year in year_list:
  for month in range(12):
    df = getHourlyData(stationID, year, month+1)
    frames.append(df)

weather_data = pd.concat(frames)

#### 3) Recode time into shifts

In [7]:
# Data cleaning
climate_raw = weather_data.copy()
climate_raw['Shift'] = climate_raw['Time'].apply(lambda x: 'Night' if x<'08:00' else 'Day' if  x<'16:00' else 'Evening')
climate_raw['Date']  = climate_raw['Date/Time'].str.slice(start=0, stop=10)

# Keep useful columns
climate = climate_raw[[ 'Date', 'Shift', 'Temp (°C)', 'Rel Hum (%)', 'Wind Dir (10s deg)', 'Wind Spd (km/h)', 'Stn Press (kPa)']]

# Rename columns
climate = climate.rename(columns={'Date': 'Date', 'Shift': 'Shift', 'Temp (°C)': 'Temp_DC', 'Rel Hum (%)': 'Humid_percent', 'Wind Dir (10s deg)': 'Win_Dir', 
                                  'Wind Spd (km/h)': 'Wind_Speed', 'Stn Press (kPa)': 'Stn_Press'})

# Use aggregate functions
climate_mean = climate.groupby(['Date','Shift']).mean()
climate_min  = climate.groupby(['Date','Shift']).min()
climate_max  = climate.groupby(['Date','Shift']).max()

climate_mean = climate_mean.rename(columns={'Temp_DC': 'Temp_DC_Mean', 'Humid_percent': 'Humid_percent_Mean', 'Win_Dir': 'Win_Dir_Mean',
                                            'Wind_Speed': 'Wind_Speed_Mean', 'Stn_Press': 'Stn_Press_Mean'})
climate_max = climate_max.rename(columns={'Temp_DC': 'Temp_DC_Max', 'Humid_percent': 'Humid_percent_Max', 'Win_Dir': 'Win_Dir_Max', 
                                          'Wind_Speed': 'Wind_Speed_Max', 'Stn_Press': 'Stn_Press_Max'})
climate_min = climate_min.rename(columns={'Temp_DC': 'Temp_DC_Min', 'Humid_percent': 'Humid_percent_Min', 'Win_Dir': 'Win_Dir_Min', 
                                          'Wind_Speed': 'Wind_Speed_Min', 'Stn_Press': 'Stn_Press_Min'})

# Join data
climate_final =climate_mean.join(climate_max)
climate_final =climate_final.join(climate_min)

In [8]:
# Check data
climate_final.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Temp_DC_Mean,Humid_percent_Mean,Win_Dir_Mean,Wind_Speed_Mean,Stn_Press_Mean,Temp_DC_Max,Humid_percent_Max,Win_Dir_Max,Wind_Speed_Max,Stn_Press_Max,Temp_DC_Min,Humid_percent_Min,Win_Dir_Min,Wind_Speed_Min,Stn_Press_Min
Date,Shift,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-01-01,Day,-4.1375,71.875,22.625,27.625,100.37,-3.3,84.0,23.0,33.0,100.58,-5.7,59.0,22.0,24.0,100.18
2015-01-01,Evening,-3.4375,82.125,23.25,24.125,100.26375,-3.2,90.0,25.0,27.0,100.39,-3.7,69.0,21.0,21.0,100.22
2015-01-01,Night,-6.625,67.625,22.25,21.25,100.7,-6.0,78.0,24.0,26.0,100.81,-7.2,46.0,21.0,18.0,100.58
2015-01-02,Day,-8.325,50.125,28.125,24.75,101.595,-7.8,56.0,30.0,32.0,101.86,-9.5,45.0,27.0,13.0,101.31
2015-01-02,Evening,-10.8875,48.625,28.875,12.5,102.41625,-8.3,52.0,30.0,25.0,102.83,-12.4,44.0,28.0,5.0,101.97


#### 4) Output data to One Drive

In [9]:
climate_final.to_csv('/content/drive/My Drive/Data/YCBS-299/Weather_data_2015_2019.csv')