In [57]:
from datetime import date
import os
import requests
import zipfile

## Purpose

Develop an approach to call StatsCan API https://www.statcan.gc.ca/eng/developers/wds/user-guide  to download tables and be notified when changes have been made.

In [2]:
# Base url to StatsCan API

base_url = 'https://www150.statcan.gc.ca/t1/wds/rest/'

In [11]:
# Get changed tables list

current_date = str(date.today())
url = base_url + f'getChangedCubeList/{current_date}'

response = requests.get(url)
results = response.json()['object']

In [49]:
# Download a zip file from a url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

# Download table from given product id
def download_table(productId: str):
    # Get changed table url
    url = base_url + f"getFullTableDownloadCSV/{productId}/en"
    response = requests.get(url)
    table_url = response.json()['object']

    # Download table csv
    download_path = f'data/{productId}.zip'
    download_url(url=table_url, save_path=download_path)

    # Unzip folder
    unzip_path = f'data/{productId}'
    with zipfile.ZipFile(download_path, 'r') as zip_ref:
        zip_ref.extractall(unzip_path)

    # Delete zip folder
    os.remove(download_path)

In [66]:
%%time
for result in results:
    download_table(result['productId'])

Wall time: 1min 32s
