Local area migration indicators UK: Migration Flows

In [1]:
from databaker.framework import *
import pandas as pd

In [2]:
from pathlib import Path

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

%run lib/scrape_ons.ipynb

metadata = scrape('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'migrationwithintheuk/datasets/localareamigrationindicatorsunitedkingdom')

In [3]:
inputFile = sourceFolder / 'data.xls'
response = session.get(metadata['fileURL'])
with open(inputFile, 'wb') as f:
  f.write(response.content)
tab = loadxlstabs(inputFile, sheetids='Migration Flows')[0]

Loading in/data.xls which has size 1161705 bytes
Table names: ['Migration Flows']


In [4]:
observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()
MigrationArea = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Period = tab.excel_ref('C1').expand(RIGHT).is_not_blank()
Migration = tab.excel_ref('C2').expand(RIGHT).is_not_blank()
Flow = tab.excel_ref('D3').expand(RIGHT).is_not_blank()
migration = HDim(Migration,'Migration',CLOSEST,LEFT)
migration.AddCellValueOverride('Internal Migration\n (within UK)', 'Internal Migration (within UK)')

Dimensions = [
            HDim(Period,'Mid Year',CLOSEST,LEFT),
            HDim(MigrationArea,'Geography', DIRECTLY, LEFT),
            migration,
            HDim(Flow,'Flow',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
#savepreviewhtml(c1)

In [5]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Mid Year,Geography,Migration,Flow,Measure Type,Unit
0,5.18159e+07,,Mid-2007 to Mid-2008,E92000001,Mid-2008 Population Estimate,,Count,People
1,553735,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Inflow,Count,People
2,316545,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Outflow,Count,People
3,98843,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Inflow,Count,People
4,116591,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Outflow,Count,People
5,5.21964e+07,,Mid-2008 to Mid-2009,E92000001,Mid-2009 Population Estimate,,Count,People
6,525074,,Mid-2008 to Mid-2009,E92000001,Long-Term International Migration,Inflow,Count,People
7,350353,,Mid-2008 to Mid-2009,E92000001,Long-Term International Migration,Outflow,Count,People
8,99548,,Mid-2008 to Mid-2009,E92000001,Internal Migration (within UK),Inflow,Count,People
9,104650,,Mid-2008 to Mid-2009,E92000001,Internal Migration (within UK),Outflow,Count,People


We can drop the mid year population estimates, as these form a separate dataset.

In [6]:
new_table = new_table[~new_table['Migration'].str.match('^Mid-[0-9]{4} Population Estimate$')].copy()

In [7]:
new_table.count()

OBS             17480
DATAMARKER         66
Mid Year        17480
Geography       17480
Migration       17480
Flow            17480
Measure Type    17480
Unit            17480
dtype: int64

In [8]:
new_table.head()

Unnamed: 0,OBS,DATAMARKER,Mid Year,Geography,Migration,Flow,Measure Type,Unit
1,553735,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Inflow,Count,People
2,316545,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Outflow,Count,People
3,98843,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Inflow,Count,People
4,116591,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Outflow,Count,People
6,525074,,Mid-2008 to Mid-2009,E92000001,Long-Term International Migration,Inflow,Count,People


In [9]:
new_table['OBS'].replace('', pd.np.nan, inplace=True)
new_table.dropna(subset=['OBS'], inplace=True)
new_table['Value'] = new_table['OBS'].astype(int)
new_table.head()

Unnamed: 0,OBS,DATAMARKER,Mid Year,Geography,Migration,Flow,Measure Type,Unit,Value
1,553735.0,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Inflow,Count,People,553735
2,316545.0,,Mid-2007 to Mid-2008,E92000001,Long-Term International Migration,Outflow,Count,People,316545
3,98843.0,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Inflow,Count,People,98843
4,116591.0,,Mid-2007 to Mid-2008,E92000001,Internal Migration (within UK),Outflow,Count,People,116591
6,525074.0,,Mid-2008 to Mid-2009,E92000001,Long-Term International Migration,Inflow,Count,People,525074


In [10]:
new_table['Mid Year'] = new_table['Mid Year'].str.replace(r'^Mid-([0-9]{4}) to Mid-([0-9]{4})\s*$',
                                                      lambda m: f"{m.group(1)}-06-30T00:00:00/P1Y")

In [11]:
new_table = new_table[['Mid Year','Geography','Migration','Flow','Measure Type','Value','Unit']]

In [12]:
new_table.head()

Unnamed: 0,Mid Year,Geography,Migration,Flow,Measure Type,Value,Unit
1,2007-06-30T00:00:00/P1Y,E92000001,Long-Term International Migration,Inflow,Count,553735,People
2,2007-06-30T00:00:00/P1Y,E92000001,Long-Term International Migration,Outflow,Count,316545,People
3,2007-06-30T00:00:00/P1Y,E92000001,Internal Migration (within UK),Inflow,Count,98843,People
4,2007-06-30T00:00:00/P1Y,E92000001,Internal Migration (within UK),Outflow,Count,116591,People
6,2008-06-30T00:00:00/P1Y,E92000001,Long-Term International Migration,Inflow,Count,525074,People


In [13]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('migrationflows.csv'), index = False)

In [14]:
writeMetadata(metadata, 'ONS Local Area Migration Indicators', 'Migration')

In [15]:
new_table.count()

Mid Year        17414
Geography       17414
Migration       17414
Flow            17414
Measure Type    17414
Value           17414
Unit            17414
dtype: int64