In [1]:
from pathlib import Path
from typing import List, Any, Dict
import requests
import pandas as pd

In [2]:
r = requests.get('https://api.beta.ons.gov.uk/v1/datasets')
dataset_titles = []
dataset_descriptions: List[Any] = []
dataset_ids: List[Any] = []
for i in range(len(r.json()['items'])):
    dataset_titles.append(r.json()['items'][i]['title'])
    dataset_descriptions.append(r.json()['items'][i]['description'])
    dataset_ids.append(r.json()['items'][i]['id'])

table_of_codes = pd.DataFrame(data={'title': dataset_titles,
                   'description': dataset_descriptions,
                  'dataset_id': dataset_ids})

SSLError: HTTPSConnectionPool(host='api.beta.ons.gov.uk', port=443): Max retries exceeded with url: /v1/datasets (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1056)')))

## find information about a dataset
we need to know the following items to make a correct api call:
* edition
* version
* time
* aggregate 
* geography
* dimension

> This allows querying of a single observation/value by providing one option per dimension, but will also allow one of
> these dimensions to be a ‘wildcard’ and return all values for this dimension.

`/datasets/{datasetId}/
editions/{edition}/
versions/{version}/
observations?
time={timeLabel}&
geography={geographyID}&
dimension3={dimension3ID}&
dimension4={dimension4ID}...`

In [5]:
def post(extension, is_ok=False):
    r = requests.get(f'https://api.beta.ons.gov.uk/v1/' + extension)
    if is_ok:
        print(r)
    return r.json()

`/datasets/cpih01/editions/time-series/versions/6/observations?time=Oct-11&geography=K02000001&aggregate=cpih1dim1A0`

### editions param

In [6]:
editions = post('/datasets/cpih01/editions')

In [None]:
editions['items'][0]['edition']

In [None]:
mother_df = pd.DataFrame.from_dict(post('/datasets/cpih01/editions/time-series/versions')['items'][3], orient='index').T

In [None]:
for i in range(len(post('/datasets/cpih01/editions/time-series/versions')['items'])):
    try:
        my_dict = post('/datasets/cpih01/editions/time-series/versions')['items'][i+1]
        df = pd.DataFrame.from_dict(my_dict, orient='index').T
        mother_df = mother_df.append(df)
    except:
        pass

In [None]:
mother_df.head()

Create a chart from Population Estimates for UK, Wales, etc.

* dataset
* edition
* version
* time
* aggregate
* geography
* dimension

code: mid-year-pop-est

In [7]:
dataset_id = 'mid-year-pop-est'

In [8]:
uk_population_dataset = post(f'/datasets/{dataset_id}', is_ok=True)

<Response [200]>


In [9]:
list(uk_population_dataset.keys())

['@context',
 'contacts',
 'description',
 'id',
 'keywords',
 'license',
 'links',
 'methodologies',
 'national_statistic',
 'next_release',
 'publications',
 'publisher',
 'qmi',
 'related_datasets',
 'release_frequency',
 'state',
 'theme',
 'title',
 'unit_of_measure']

In [10]:
uk_population_dataset

{'@context': 'https://cdn.ons.gov.uk/assets/json-ld/context.json',
 'contacts': [{'email': 'pop.info@ons.gov.uk ',
   'name': 'Neil Park ',
   'telephone': '+44 (0)1329 444661'}],
 'description': 'Estimates of the usual resident population for the UK as at 30 June of the reference year. Provided by administrative area, single year of age and sex.',
 'id': 'mid-year-pop-est',
 'keywords': ['Population'],
 'license': 'Open Government Licence v3.0',
 'links': {'access_rights': {},
  'editions': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions'},
  'latest_version': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4',
   'id': '4'},
  'self': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est'}},
 'methodologies': [{'description': 'Background information and methods changes for this year',
   'href': 'https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/metho

In [11]:
uk_population_dataset.keys()

dict_keys(['@context', 'contacts', 'description', 'id', 'keywords', 'license', 'links', 'methodologies', 'national_statistic', 'next_release', 'publications', 'publisher', 'qmi', 'related_datasets', 'release_frequency', 'state', 'theme', 'title', 'unit_of_measure'])

## fetch the latest version of this dataset

In [12]:
uk_pop_latest_version = uk_population_dataset['links']['latest_version']['id']
uk_pop_latest_version


'4'

In [13]:
uk_population_editions = post(f'/datasets/{dataset_id}/editions/time-series/versions/{uk_pop_latest_version}', is_ok=True)
uk_population_editions


<Response [200]>


{'@context': 'https://cdn.ons.gov.uk/assets/json-ld/context.json',
 'alerts': [],
 'collection_id': 'midyearpopest-7e05d4510fcc86e5182e9838ee4419a92e1e28e2d71885e5d1b1b10a11a33fd7',
 'dimensions': [{'href': 'https://api.beta.ons.gov.uk/v1/code-lists/calendar-years',
   'id': 'calendar-years',
   'links': {'code_list': {}, 'options': {}, 'version': {}},
   'name': 'time'},
  {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-geography',
   'id': 'mid-year-pop-geography',
   'label': 'Geography',
   'links': {'code_list': {}, 'options': {}, 'version': {}},
   'name': 'geography'},
  {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex',
   'id': 'mid-year-pop-sex',
   'links': {'code_list': {}, 'options': {}, 'version': {}},
   'name': 'sex'},
  {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-age',
   'id': 'mid-year-pop-age',
   'links': {'code_list': {}, 'options': {}, 'version': {}},
   'name': 'age'}],
 'downloads': {'csv': {'href': 'htt

##### fetch the dimensions that this dataset has to build the rest of the api.
##### We use the code list for this, a separate service

In [14]:
uk_population_code_list = post(f'/code-lists/{dataset_id}', is_ok=True)

<Response [404]>


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
dimensions = post(f'/datasets/{dataset_id}/editions/time-series/versions/{uk_pop_latest_version}/dimensions', is_ok=True)

In [15]:
dimensions['items'][3]
# 'time'
# 'sex'
# 'geography'
# 'age'

NameError: name 'dimensions' is not defined

In [16]:
# check options for each dimension
dimension = 'sex'
options: Dict = post(f'/datasets/{dataset_id}/editions/time-series/versions/{uk_pop_latest_version}/dimensions/{dimension}/options', is_ok=True)
options['items']

<Response [200]>


[{'dimension': 'sex',
  'label': 'All',
  'links': {'code': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex/codes/0',
    'id': '0'},
   'code_list': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex',
    'id': 'mid-year-pop-sex'},
   'version': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4',
    'id': '4'}},
  'option': '0'},
 {'dimension': 'sex',
  'label': 'Male',
  'links': {'code': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex/codes/1',
    'id': '1'},
   'code_list': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex',
    'id': 'mid-year-pop-sex'},
   'version': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4',
    'id': '4'}},
  'option': '1'},
 {'dimension': 'sex',
  'label': 'Female',
  'links': {'code': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex/codes/2',
    'id': '

In [17]:
# age = type in int for age, up to '90+'. Use wildcard if possible. e.g '21'
# time = id is the year e.g '2010'
# geography = geography codes. UK = K02000001
# sex = 'All': 0, 'Male': 1, 'Female': 2
# %%

post(f'/datasets/{dataset_id}'
     f'/editions/time-series'
     f'/versions/4/'
     f'observations?'
     f'time=2017&'
     f'geography=K02000001&'
     f'sex=0&'
     f'age=*', is_ok=True)

<Response [200]>


{'@context': 'https://cdn.ons.gov.uk/assets/json-ld/context.json',
 'dimensions': {'geography': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-geography/codes/K02000001',
    'id': 'K02000001'}},
  'sex': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex/codes/0',
    'id': '0'}},
  'time': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/calendar-years/codes/2017',
    'id': '2017'}}},
 'limit': 10000,
 'links': {'dataset_metadata': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4/metadata'},
  'self': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4/observations?time=2017&geography=K02000001&sex=0&age=*'},
  'version': {'href': 'https://api.beta.ons.gov.uk/v1/datasets/mid-year-pop-est/editions/time-series/versions/4',
   'id': '4'}},
 'observations': [{'dimensions': {'age': {'href': 'https://api.beta.ons.gov.uk/v1

In [19]:
# extract the individual values

observations: Dict = post(f'/datasets/{dataset_id}'
     f'/editions/time-series'
     f'/versions/4/'
     f'observations?'
     f'time=2017&'
     f'geography=K02000001&'
     f'sex=0&'
     f'age=*', is_ok=True)



<Response [200]>


In [23]:
list(observations.keys())

['@context',
 'dimensions',
 'limit',
 'links',
 'observations',
 'offset',
 'total_observations',
 'unit_of_measure',
 'usage_notes']

In [24]:
observations['dimensions']

{'geography': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-geography/codes/K02000001',
   'id': 'K02000001'}},
 'sex': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/mid-year-pop-sex/codes/0',
   'id': '0'}},
 'time': {'option': {'href': 'https://api.beta.ons.gov.uk/v1/code-lists/calendar-years/codes/2017',
   'id': '2017'}}}

In [28]:
observations['observations'].count()

'Number of people'