# International Passenger Survey 4.01, citizenship group by sex by age by country of last or next residence

Convert all tabs from latest Excel spreadsheet available from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/internationalmigration/datasets/ipscitizenshipgroupbysexbyagebycountryoflastornextresidence

In [1]:
%run lib/scrape_ons.ipynb

metadata = scrape('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'internationalmigration/datasets/ipscitizenshipgroupbysexbyagebycountryoflastornextresidence')

Download the spreadsheet and load directly into Pandas.

In [2]:
from databaker.framework import *
import pandas as pd

xls = pd.ExcelFile(BytesIO(session.get(metadata['fileURL']).content))

Now run separate notebooks for each tab in the spreadsheet and collate the results in a single DataFrame.

In [3]:
next_table = pd.DataFrame()

In [4]:
%%capture
%run "Long-term international migration 4.01A Passenger survey.ipynb"
next_table = pd.concat([next_table, Final_table])

%run "Long-term international migration 4.01B Passenger survey.ipynb"
next_table = pd.concat([next_table, Final_table])

%run"Long-term international migration 4.01C Passenger survey.ipynb"
next_table = pd.concat([next_table, Final_table])

%run "Long-term international migration 4.01D Passenger survey.ipynb"
next_table = pd.concat([next_table, Final_table])

In [5]:
next_table.count()

Geography               53184
Year                    53184
Country of Residence    53184
Migration Flow          53184
Citizenship             53184
Sex                     53184
Age                     53184
Measure Type            53184
Value                   53184
CI                      53184
Unit                    53184
dtype: int64

In [6]:
next_table['Citizenship'] = next_table['Citizenship'].str.lstrip('cit ')

In [7]:
next_table['Age'] = next_table['Age'].replace(to_replace= r" ", value = "/", regex=True)

In [8]:
next_table['Age'] = next_table['Age'].map(lambda cell: cell.replace('age/all', 'all'))

In [9]:
next_table['Age'] = next_table['Age'].map(lambda cell: cell.replace('65/plus', '65-plus'))

In [10]:
next_table['Country of Residence'] = next_table['Country of Residence'].map(lambda cell: cell.replace('korea,-south-/-republic', 'korea south republic'))

In [11]:
next_table['Country of Residence'] = next_table['Country of Residence'].map(lambda cell: cell.replace('myanmar-/-burma', 'myanmar burma'))

In [12]:
next_table['Country of Residence'] = next_table['Country of Residence'].map(lambda cell: cell.replace('cambodia-/-kampuchea', 'cambodia kampuchea'))

In [13]:
next_table['Country of Residence'] = next_table['Country of Residence'].map(lambda cell: cell.replace('cyprus,-northern', 'cyprus-northern'))

In [14]:
next_table['Country of Residence'] = next_table['Country of Residence'].map(lambda cell: cell.replace('cyprus,-southern', 'cyprus-southern'))

In [15]:
next_table.columns = ['Area of Destination or Origin' if x=='Geography' else x for x in next_table.columns]

In [16]:
next_table['Citizenship'] = next_table['Citizenship'].map(lambda cell: cell.replace(' ', '-'))

In [17]:
next_table['Citizenship'].unique()

array(['all', 'british', 'non-british', 'british-or-british-overseas',
       'not-british-or-british-overseas'], dtype=object)

In [18]:
next_table = next_table[['Area of Destination or Origin','Year','Country of Residence','Migration Flow','Citizenship','Sex','Age','Measure Type','Value','CI','Unit']]

In [19]:
next_table.head()

Unnamed: 0,Area of Destination or Origin,Year,Country of Residence,Migration Flow,Citizenship,Sex,Age,Measure Type,Value,CI,Unit
1,K02000001,2016,all,inflow,all,T,all,Count,526.6,34.4,People (thousands)
3,K02000001,2016,all,inflow,all,T,agq/0-4,Count,8.1,3.2,People (thousands)
5,K02000001,2016,all,inflow,all,T,agq/5-9,Count,10.2,7.0,People (thousands)
7,K02000001,2016,all,inflow,all,T,agq/10-14,Count,7.9,3.9,People (thousands)
9,K02000001,2016,all,inflow,all,T,agq/15-19,Count,55.5,10.0,People (thousands)


In [20]:
next_table.tail()

Unnamed: 0,Area of Destination or Origin,Year,Country of Residence,Migration Flow,Citizenship,Sex,Age,Measure Type,Value,CI,Unit
49479,K02000001,2016,zimbabwe,balance,not-british-or-british-overseas,F,agq/55-59,Count,0.1,0.1,People (thousands)
49481,K02000001,2016,zimbabwe,balance,not-british-or-british-overseas,F,ag1/19-21,Count,0.2,0.3,People (thousands)
49483,K02000001,2016,zimbabwe,balance,not-british-or-british-overseas,F,ag1/22-59,Count,0.1,0.1,People (thousands)
49485,K02000001,2016,zimbabwe,balance,not-british-or-british-overseas,F,ag2/15-24,Count,0.2,0.3,People (thousands)
49487,K02000001,2016,zimbabwe,balance,not-british-or-british-overseas,F,ag2/45-59,Count,0.1,0.1,People (thousands)


In [21]:
out = Path('out')
out.mkdir(exist_ok=True)
next_table.to_csv(out / 'migration_4.01.csv', index = False)

In [22]:
writeMetadata(metadata, 'ONS LTIM Passenger Survey 4.01', 'Migration')