In [2]:
import os
import pandas as pd

Load the latest CSV. This is imported using `dvc import-url`, and updated using `dvc update -R data/raw`.

In [3]:
data = pd.read_csv(
    '../data/raw/LMS_latest_by_geo.csv',
    parse_dates=['DATE'],
    usecols=['DATE', 'GEOGRAPHY_CODE', 'GEOGRAPHY_NAME', 'GEOGRAPHY_TYPE', 'VARIABLE_CODE', 'VARIABLE_NAME', 'MEASURES_NAME', 'OBS_VALUE']
)

Convert the column names to lower case.

In [4]:
data.columns = data.columns.str.lower()

Rename the `obs_value` column to `value`.

In [5]:
data = data.rename(
    columns={ 'obs_value': 'value' }
)

Filter out only the **Variable** measures. The dataset also provides other measures such as **Numerator**, **Denominator** and **Confidence**

In [6]:
data = data.loc[ data.measures_name == 'Variable' ].drop(columns='measures_name')

Create the target directory.

In [7]:
OUTPUT_DIR = '../data/processed/labour-market' 
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [8]:
data.loc[ data.geography_type == 'parliamentary constituencies 2010' ].drop(columns=['geography_type']).to_csv(f'{OUTPUT_DIR}/lms_by_pcon_2010.csv', index=False)

In [9]:
data

Unnamed: 0,date,geography_name,geography_code,geography_type,variable_name,variable_code,value
0,2023-03-01,Berwick-upon-Tweed,E14000554,parliamentary constituencies 2010,Economic activity rate - aged 16-24,1201,65.3
4,2023-03-01,Berwick-upon-Tweed,E14000554,parliamentary constituencies 2010,% who are economically inactive - aged 16-24,1219,34.7
8,2023-03-01,Berwick-upon-Tweed,E14000554,parliamentary constituencies 2010,Employment rate - aged 16-24,1207,52.3
12,2023-03-01,Berwick-upon-Tweed,E14000554,parliamentary constituencies 2010,Unemployment rate - aged 16-24,1213,
16,2023-03-01,Berwick-upon-Tweed,E14000554,parliamentary constituencies 2010,% aged 18-24 in employment,542,71.0
...,...,...,...,...,...,...,...
24980,2023-03-01,Bournemouth West,E14000585,parliamentary constituencies 2010,"% aged 18-24 unemployed (percentage of all, no...",543,
24984,2023-03-01,Bournemouth West,E14000585,parliamentary constituencies 2010,% aged 18-24 in full time education,544,14.0
24988,2023-03-01,Bournemouth West,E14000585,parliamentary constituencies 2010,% aged 18-64 economically active with NVQ leve...,545,
24992,2023-03-01,Bournemouth West,E14000585,parliamentary constituencies 2010,% aged 18-64 economically active with NVQ leve...,546,
