In [7]:
import pandas as pd
from tqdm.notebook import tqdm
from pyjstat import pyjstat
from typing import List

In [8]:
nomis_la_codes = pd.read_csv("./A_Assumptions/nomis_la_codes.csv")

In [9]:
def nomis_url(table_name: str, geography: str) -> str:
    # tables:
    # NM_161_1 - Deaths
    # NM_31_1 - Population

    url_base = f"https://www.nomisweb.co.uk/api/v01/dataset/{table_name}.jsonstat.json?"
    url_geography_base="geography="
    url_date_base="date="    

    url_params = {}
    url_params["NM_161_1"] = "&cause_of_death=0,98,99,1...18,20&gender=0...2&age=0...20&measure=1&measures=20100"
    url_params["NM_31_1"] = "&sex=5...7&age=0...19&measures=20100"

    dates = [
        "latest",
        "latestMINUS1",
        "latestMINUS2",
        "latestMINUS3",
        "latestMINUS4",
        "latestMINUS5",
        "latestMINUS6",
    ]
    date_enc = ",".join(dates)

    url = (
        url_base
        + url_geography_base
        + geography
        + "&"
        + url_date_base
        + date_enc
        + url_params[table_name]
    )

    return url


In [10]:
def write_list(output_list: List, output_filename: str) -> None:
    with open(f"./X_Output/{output_filename}", "w") as textfile:
        for el in output_list:
            textfile.write(el + "\n")

In [11]:
first = True
population_urls = []
for _, geography in tqdm(nomis_la_codes.iterrows()):
    url = nomis_url("NM_31_1", str(geography["GEOGRAPHY"]))
    population_urls.append(url)
    dataset = pyjstat.Dataset.read(url)

    df = dataset.write('dataframe')
    df['GEOGRAPHY'] = geography["GEOGRAPHY"]
    df['GEOGRAPHY_NAME'] = geography["GEOGRAPHY_NAME"]
    df['GEOGRAPHY_CODE'] = geography["GEOGRAPHY_CODE"]
    if first:
        population = df
        first = False
    else:
        population = pd.concat([population, df], axis=0)

0it [00:00, ?it/s]

In [12]:
population.to_csv("./X_Output/la_population.csv")
write_list(population_urls, "population_urls.txt")

In [14]:
first = True
geography_urls = []
for _, geography in tqdm(nomis_la_codes.iterrows()):
    url = nomis_url("NM_161_1", str(geography["GEOGRAPHY"]))
    geography_urls.append(url)
    dataset = pyjstat.Dataset.read(url)
    df = dataset.write('dataframe')
    df['GEOGRAPHY'] = geography["GEOGRAPHY"]
    df['GEOGRAPHY_NAME'] = geography["GEOGRAPHY_NAME"]
    df['GEOGRAPHY_CODE'] = geography["GEOGRAPHY_CODE"]
    if first:
        deaths = df
        first = False
    else:
        deaths = pd.concat([deaths, df], axis=0)

0it [00:00, ?it/s]

In [15]:
deaths.to_csv("./X_Output/la_deaths.csv")
write_list(geography_urls, "geography_urls.txt")