# Long-term international migration 2.04, main reason for migration

In [1]:
from gssutils import *

scraper = Scraper('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'internationalmigration/datasets/' \
                  'longterminternationalmigrationmainreasonformigrationtable204')
scraper

## Long-term international migration 2.04, main reason for migration, UK and England and Wales

The primary purpose of migrants entering or leaving UK. Estimates of Long-Term International Migration, annual table.

### Distributions

1. Long-term international migration 2.04, main reason for migration, UK and England and Wales ([MS Excel Spreadsheet](https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/internationalmigration/datasets/longterminternationalmigrationmainreasonformigrationtable204/current/2.04ltimmainreasonformigration1991to2017.xls))


In [2]:
tab = next(t for t in scraper.distribution().as_databaker() if t.name == 'Table 2.04')

In [3]:
top_left = tab.filter('Year')
top_left.assert_one()
reason = top_left.fill(RIGHT).is_not_blank() | top_left.shift(DOWN).fill(RIGHT).is_not_blank()
year = top_left.fill(DOWN).regex(r'[0-9]{4}(\.0)?')
flow = top_left.fill(DOWN).one_of(['Inflow', 'Outflow'])
geography = top_left.fill(DOWN).one_of(['United Kingdom', 'England and Wales'])
observations = year.fill(RIGHT) & reason.fill(DOWN)
observations_ci = observations.shift(RIGHT)
cs = ConversionSegment(observations, [
    HDim(year, 'Year', DIRECTLY, LEFT),
    HDim(geography, 'Geography', CLOSEST, ABOVE),
    HDim(reason, 'Reason for migration', DIRECTLY, ABOVE),
    HDim(flow, 'Migration Flow', CLOSEST, ABOVE),
    HDimConst('Measure Type', 'Count'),
    HDimConst('Unit','People (thousands)'),
    HDim(observations_ci, 'CI', DIRECTLY, RIGHT)
])                     
savepreviewhtml(cs)

0,1,2,3,4,5
OBS,Year,Geography,Reason for migration,Migration Flow,CI

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,
,,,,Highlight significant changes over the last year?,,,,,,,,,,1,,,,
,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,
Table 2.04,,,,,,,,,,,,,,,,,,Series MN
,,,,,,,,,,,,,,,,,,
Long-Term International Migration,,,,,,,,,,,,,,,,,,"United Kingdom,"
"time series, 1991 to 2017",,,,,,,,,,,,,,,,,,England and Wales


In [4]:
tidy = cs.topandas()
tidy.rename(columns={'OBS': 'Value'}, inplace=True)
tidy




Unnamed: 0,Value,DATAMARKER,Year,Geography,Reason for migration,Migration Flow,Measure Type,Unit,CI
0,329,,1991.0,United Kingdom,All reasons,Inflow,Count,People (thousands),23.0
1,71,,1991.0,United Kingdom,All,Inflow,Count,People (thousands),10.0
2,50,,1991.0,United Kingdom,Definite job,Inflow,Count,People (thousands),9.0
3,21,,1991.0,United Kingdom,Looking for work1,Inflow,Count,People (thousands),4.0
4,90,,1991.0,United Kingdom,Accompany / join,Inflow,Count,People (thousands),14.0
5,56,,1991.0,United Kingdom,Formal study,Inflow,Count,People (thousands),10.0
6,67,,1991.0,United Kingdom,Other,Inflow,Count,People (thousands),7.0
7,45,,1991.0,United Kingdom,No reason stated2,Inflow,Count,People (thousands),9.0
8,268,,1992.0,United Kingdom,All reasons,Inflow,Count,People (thousands),20.0
9,76,,1992.0,United Kingdom,All,Inflow,Count,People (thousands),10.0


In [5]:
tidy['Marker'] = tidy['DATAMARKER'].map(lambda x:'not-available'
                                  if (x == ':')
                                  else (x))

In [6]:
# tidy = cs.topandas()
# tidy = tidy[pd.isna(tidy['DATAMARKER'])].copy() # Todo: data markers dropped; figure out how to model them
# tidy.drop(columns=['DATAMARKER'], inplace=True)
# tidy.rename(columns={'OBS': 'Value'}, inplace=True)
# tidy

In [7]:
from IPython.core.display import HTML
for col in tidy:
    if col not in ['Value', 'CI']:
        tidy[col] = tidy[col].astype('category')
        display(HTML(f"<h2>{col}</h2>"))
        display(tidy[col].cat.categories)

Index([':'], dtype='object')

Index(['1991.0', '1992.0', '1993.0', '1994.0', '1995.0', '1996.0', '1997.0',
       '1998.0', '1999.0', '2000.0', '2001.0', '2002.0', '2003.0', '2004.0',
       '2005.0', '2006.0', '2007.0', '2008.0', '2009.0', '2010.0', '2011.0',
       '2012.0', '2013.0', '2014.0', '2015', '2016', '2017'],
      dtype='object')

Index(['England and Wales', 'United Kingdom'], dtype='object')

Index(['Accompany / join', 'All', 'All reasons', 'Definite job',
       'Formal study', 'Looking for work1', 'No reason stated2', 'Other'],
      dtype='object')

Index(['Inflow', 'Outflow'], dtype='object')

Index(['Count'], dtype='object')

Index(['People (thousands)'], dtype='object')

Index(['not-available'], dtype='object')

In [8]:
tidy['Year'].cat.categories = tidy['Year'].cat.categories.map(lambda x: str(int(float(x))))
tidy['Geography'] = tidy['Geography'].cat.rename_categories({
    'United Kingdom': 'K02000001',
    'England and Wales': 'K04000001'
})
tidy['Reason for migration'] = tidy['Reason for migration'].cat.rename_categories({
    'Accompany / join': 'accompany-or-join',
    'All': 'work-related-all',
    'All reasons': 'all-reasons',
    'Definite job': 'work-related-definite-job',
    'Formal study': 'formal-study',
    'Looking for work1': 'work-related-looking-for-work',
    'No reason stated2': 'no-reason-stated',
    'Other': 'other'
})
tidy['Migration Flow'].cat.categories = tidy['Migration Flow'].cat.categories.map(lambda x: pathify(x))
# tidy['Value'] = tidy['Value'].astype(int)
tidy = tidy[['Geography', 'Year', 'Reason for migration', 'Migration Flow',
             'Measure Type','Value','CI','Unit','Marker']]
tidy

Unnamed: 0,Geography,Year,Reason for migration,Migration Flow,Measure Type,Value,CI,Unit,Marker
0,K02000001,1991,all-reasons,inflow,Count,329,23.0,People (thousands),
1,K02000001,1991,work-related-all,inflow,Count,71,10.0,People (thousands),
2,K02000001,1991,work-related-definite-job,inflow,Count,50,9.0,People (thousands),
3,K02000001,1991,work-related-looking-for-work,inflow,Count,21,4.0,People (thousands),
4,K02000001,1991,accompany-or-join,inflow,Count,90,14.0,People (thousands),
5,K02000001,1991,formal-study,inflow,Count,56,10.0,People (thousands),
6,K02000001,1991,other,inflow,Count,67,7.0,People (thousands),
7,K02000001,1991,no-reason-stated,inflow,Count,45,9.0,People (thousands),
8,K02000001,1992,all-reasons,inflow,Count,268,20.0,People (thousands),
9,K02000001,1992,work-related-all,inflow,Count,76,10.0,People (thousands),


In [9]:
out = Path('out')
out.mkdir(exist_ok=True, parents=True)

tidy.drop_duplicates().to_csv(out / ('observations.csv'), index = False)

In [10]:
from gssutils.metadata import THEME

scraper.dataset.family = 'migration'
scraper.dataset.theme = THEME['population']
with open(out / 'dataset.trig', 'wb') as metadata:
    metadata.write(scraper.generate_trig())