In [1]:

import requests
from pprint import pprint
import pandas as pd

# code lists that we dont want to convert for...reasons
EXCEPTIONS = [
    "calendar-years"
]

dataset_url_list = [
    "https://api.beta.ons.gov.uk/v1/datasets/ashe-tables-7-and-8"
]

def get_unique_codelist_urls_from_a_dataset(url):
    
    codelist_list = []
    
    # Get the dataset info
    r = requests.get(url)
    if r.status_code != 200:
        raise ValueError("Failed with status code: " + r.status_code)
        
    dataset_as_dict = r.json()
    lastest_version_url = dataset_as_dict["links"]["latest_version"]["href"]
    
    #  Get the lastest version of that dataset info
    r = requests.get(lastest_version_url)
    if r.status_code != 200:
        raise ValueError("Failed with status code: " + r.status_code)
                         
    lastest_version_as_dict = r.json()
    
    # For each dimension
    for dimension in lastest_version_as_dict["dimensions"]:
        
        code_list_url = "https://api.beta.ons.gov.uk/v1/code-lists/{}/editions/one-off/codes".format(dimension["id"])
        codelist_list.append(code_list_url)
    
    return codelist_list


def create_codelist_reference_csv_from_codelist_url(url):
    
    #  Get the lastest version of that dataset info
    r = requests.get(url)
    if r.status_code != 200:
        raise ValueError("Failed with status code: " + r.status_code)
                        
    code_list_info = r.json()
    
    # Get the codelist id
    code_list_id = code_list_info["items"][0]["links"]["code_list"]["href"].split("/")[-1]
    
    if code_list_id in EXCEPTIONS:
        return 
    
    df_dict = {
        "Label":[],
        "Notation":[],
        "Parent":[],
        "Sort Priority":[]
    }
    
    for code_list in code_list_info["items"]:
        
        df_dict["Label"].append(code_list["label"])
        df_dict["Notation"].append(code_list["id"])
        df_dict["Parent"].append("")
        df_dict["Sort Priority"].append("")
                    
    df = pd.DataFrame().from_dict(df_dict)
    df.to_csv("../reference/codelists/{}.csv".format(code_list_id), index=False)
                        
                        
dataset_url_list = [
    "https://api.beta.ons.gov.uk/v1/datasets/ashe-tables-7-and-8"
]

for dataset in dataset_url_list:
    
    code_list_urls_from_dataset = get_unique_codelist_urls_from_a_dataset(dataset)
    
    for cl in code_list_urls_from_dataset:
        create_codelist_reference_csv_from_codelist_url(cl)
    
    
    