# Extract data from NOMIS (Top level geographies)

The NOMIS API doesn't always comply with JSON-Stat structure, so geographies (and other metadata) needs to be pulled using the REST API and then processed to extract the required data.

In [21]:
import pandas as pd
import requests
from pyjstat import pyjstat
from typing import List, Dict


**Fetch JSON list of geographies from the NOMIS API 1.1**

Full details for the NOMIS API are available at : <https://www.nomisweb.co.uk/api/v01/help>

In [22]:
# Top level geographies
# url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography.def.sdmx.json"

# What areas are in England (2092957699)
# url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957699.def.sdmx.json"

# Fetch a list of Lower Super Output Areas (Type 298) for England 2092957699
url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957699TYPE298.def.sdmx.json"

response = requests.get(url)
data = response.json()

In [23]:
def list_keys(d: Dict)-> List:
    """Function to return list of the top level keys in a dictionary

    Args:
        d (Dict): Dictionary for the top level keys

    Returns:
        List: Top level keys from provided dictionary
    """    
    key_list = []
    if isinstance(d, Dict):
        for key in d:
            key_list.append(key)
    return key_list

**Determine the structure of the returned JSON file to identify keys for the metadata and the actual data.**

In [24]:
structure = data['structure']
print(list_keys(structure))

['codelists', 'header', 'xmlns', 'common', 'structure', 'xsi', 'schemalocation']


In [25]:
codelists = structure['codelists']
print(list_keys(codelists))

['codelist']


**The codes are listed in structure\codelists\codelist[0]\code**

In [26]:
codelist = codelists['codelist']
codes = codelist[0]['code']

**Iterate over list to extract annotations for each record**

*Annotations are in a annotations\annotation sublist with annotationtitle as key and annotationtext as value*

*Record value is in the key value*

Final result stored in a dataFrame

In [27]:
records = []

for el in codes:
    record = {}

    annotations = el['annotations']['annotation']
    for annotation in annotations:
        record[annotation['annotationtitle']] = annotation['annotationtext']

    record['value'] = el['value']

    records.append(record)

codes_df = pd.DataFrame(records)

**Export the list of lsoa codes to csv file**

In [30]:
codes_df.to_csv('./A_Assumptions/nomis_lsoa_codes_raw.csv', index = False)