In [1]:
# import pandas as pd
# from pathlib import Path
import requests

In [4]:
census_dataset_url = r"https://api.beta.ons.gov.uk/v1/datasets?is_based_on=UR&limit=50"

response = requests.get(census_dataset_url, timeout=5) # there's 29 datasets/topic summaries for the usual residence population...34 as of 19/01/2023
census_datasets = response.json()
census_datasets_ids = []
for dataset in census_datasets["items"]:
    census_datasets_ids.append(dataset["id"])

In [6]:
root_dataset_url = r"https://api.beta.ons.gov.uk/v1/datasets/"
edition = "2021" # all census data is 2021

versions = []
for dataset_id in census_datasets_ids:
    edition_request_url = root_dataset_url + dataset_id + "/editions/" + edition
    # get latest version
    response = requests.get(edition_request_url, timeout=5)
    edition_endpoint_metadata = response.json()
    latest_version = edition_endpoint_metadata["links"]["latest_version"]["id"]
    versions.append(latest_version)

# combine 
ids_and_versions = dict(zip(census_datasets_ids, versions))
# The zip function is used to combine the two lists into an iterable of tuples, 
# where the first element of each tuple is an id and the second element is the corresponding version. The dict function is then used to convert the iterable of tuples into a dictionary.

# create nested dictionary
dataset_dict = {census_datasets_ids: {'version': version} for census_datasets_ids, version in zip(census_datasets_ids, versions)} # comprehension dictionary
print(dataset_dict)

{'TS076': {'version': '1'}, 'TS071': {'version': '2'}, 'TS067': {'version': '2'}, 'TS065': {'version': '2'}, 'TS064': {'version': '2'}, 'TS063': {'version': '2'}, 'TS062': {'version': '2'}, 'TS061': {'version': '2'}, 'TS060': {'version': '2'}, 'TS059': {'version': '2'}, 'TS056': {'version': '3'}, 'TS039': {'version': '2'}, 'TS038': {'version': '2'}, 'TS037': {'version': '2'}, 'TS036': {'version': '2'}, 'TS035': {'version': '2'}, 'TS034': {'version': '2'}, 'TS033': {'version': '2'}, 'TS032': {'version': '2'}, 'TS030': {'version': '2'}, 'TS029': {'version': '2'}, 'TS028': {'version': '3'}, 'TS027': {'version': '3'}, 'TS021': {'version': '2'}, 'TS019': {'version': '2'}, 'TS016': {'version': '2'}, 'TS015': {'version': '2'}, 'TS013': {'version': '2'}, 'TS012': {'version': '1'}, 'TS009': {'version': '1'}, 'TS008': {'version': '3'}, 'TS007': {'version': '2'}, 'TS005': {'version': '2'}, 'TS002': {'version': '2'}}


In [7]:
# get all csv and csvw links
for id in dataset_dict:
    version = dataset_dict[id]["version"]
   
    csv_link_request_url = root_dataset_url + id + "/editions/" + edition + "/versions/" + version
    response = requests.get(csv_link_request_url, timeout=5)
    dataset_metadata = response.json()
    csv_link = dataset_metadata["downloads"]["csv"]["href"]
    csvw_link = dataset_metadata["downloads"]["csvw"]["href"]
    dataset_dict[id].update({"csv": csv_link})
    dataset_dict[id].update({"csvw": csvw_link})

In [None]:
# download the data
for id in dataset_dict: 
    # csv
    response = requests.get(dataset_dict[id]["csv"], timeout=5)
    csv_bytes = response.content # content gives the content of the response in bytes
    csv_file = open(id + "_" + edition +".csv","wb")
    csv_file.write(csv_bytes)
    csv_file.close()
    # csvw
    response = requests.get(dataset_dict[id]["csvw"], timeout=5)
    csvw_bytes = response.content # content gives the content of the response in bytes
    csvw_file = open(id + "_" + edition +"_csvw-metadata.json","wb")
    csvw_file.write(csvw_bytes)
    csvw_file.close()