Local area migration indicators UK: Migration Flows

In [1]:
from databaker.framework import *
import pandas as pd

In [2]:
from pathlib import Path

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

%run lib/scrape_ons.ipynb

metadata = scrape('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'migrationwithintheuk/datasets/localareamigrationindicatorsunitedkingdom')

In [3]:
inputFile = sourceFolder / 'data.xls'
response = session.get(metadata['fileURL'])
with open(inputFile, 'wb') as f:
  f.write(response.content)
tab = loadxlstabs(inputFile, sheetids='Migration Flows')[0]

Loading in/data.xls which has size 1161705 bytes
Table names: ['Migration Flows']


In [None]:
observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()
MigrationArea = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Period = tab.excel_ref('C1').expand(RIGHT).is_not_blank()
Migration = tab.excel_ref('C2').expand(RIGHT).is_not_blank()
Flow = tab.excel_ref('D3').expand(RIGHT).is_not_blank()
migration = HDim(Migration,'Migration',CLOSEST,LEFT)
migration.AddCellValueOverride('Internal Migration\n (within UK)', 'Internal Migration (within UK)')

Dimensions = [
            HDim(Period,'Mid Year',CLOSEST,LEFT),
            HDim(MigrationArea,'Geography', DIRECTLY, LEFT),
            migration,
            HDim(Flow,'Flow',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
#savepreviewhtml(c1)

In [None]:
new_table = c1.topandas()
new_table

We can drop the mid year population estimates, as these form a separate dataset.

In [None]:
new_table = new_table[~new_table['Migration'].str.match('^Mid-[0-9]{4} Population Estimate$')].copy()

In [None]:
new_table.count()

In [None]:
new_table.head()

In [None]:
new_table['OBS'].replace('', pd.np.nan, inplace=True)
new_table.dropna(subset=['OBS'], inplace=True)
new_table['Value'] = new_table['OBS'].astype(int)
new_table.head()

In [None]:
new_table['Mid Year'] = new_table['Mid Year'].str.replace(r'^Mid-([0-9]{4}) to Mid-([0-9]{4})\s*$',
                                                      lambda m: f"{m.group(1)}-06-30T00:00:00/P1Y")

In [None]:
new_table = new_table[['Mid Year','Geography','Migration','Flow','Measure Type','Value','Unit']]

In [None]:
new_table.head()

In [None]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('migrationflows.csv'), index = False)

In [None]:
writeMetadata(metadata, 'ONS Local Area Migration Indicators', 'Migration')

In [None]:
new_table.count()