Long-term international migration 2.01a, citizenship, UK and England and Wales

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
from pathlib import Path

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

%run lib/scrape_ons.ipynb

metadata = scrape('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'internationalmigration/datasets/longterminternationalmigrationcitizenshiptable201a')
metadata

{'about': 'Nationality of migrants. Estimates of Long-Term International Migration, annual table.',
 'fileURL': 'https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/internationalmigration/datasets/longterminternationalmigrationcitizenshiptable201a/current/2.01altimcitizenship2004to2016.xls',
 'mailto': 'mailto:migstatsunit@ons.gsi.gov.uk',
 'releaseDate': datetime.date(2017, 11, 30),
 'title': 'Long-term international migration 2.01a, citizenship, UK and England and Wales'}

In [3]:
inputFile = sourceFolder / 'data.xls'
response = session.get(metadata['fileURL'])
with open(inputFile, 'wb') as f:
  f.write(response.content)
tab = loadxlstabs(inputFile, sheetids='Table 2.01a')[0]

Loading in\data.xls which has size 268288 bytes
Table names: ['Table 2.01a']


In [4]:
# tab = pd.read_excel(inputFile, header = None, sheet_name = 1)
# tab

In [5]:
observations = tab.excel_ref("B15").expand(RIGHT).filter(contains_string("Estimate")).expand(DOWN).is_not_blank().is_not_whitespace()
observations

{<AU106 0.0>, <AM58 2.0>, <O78 11.0>, <I88 -50.0>, <T33 265.0>, <AS60 2.0>, <G79 380.0>, <AD28 129.0>, <M99 -41.0>, <AO64 14.0>, <AS48 -9.0>, <K80 94.0>, <AB115 36.0>, <AK78 24.0>, <O46 -3.0>, <B84 'No'>, <AS107 -2.0>, <AQ87 -6.0>, <AD29 71.0>, <AI47 -27.0>, <AS54 9.0>, <AU45 0.0>, <T42 -119.0>, <B38 -361.0>, <I93 -89.0>, <O37 'z'>, <AI108 27.0>, <X61 103.0>, <E39 -207.0>, <M27 86.0>, <AU28 1.0>, <AQ78 5.0>, <Q62 1.0>, <AM28 4.0>, <Z92 -5.0>, <B92 -328.0>, <AF28 20.0>, <AS42 -26.0>, <X55 155.0>, <AU108 0.0>, <AO54 4.0>, <Q21 0.0>, <AD88 -14.0>, <AM41 -2.0>, <AM53 6.0>, <V87 -8.0>, <AD62 27.0>, <AK27 29.0>, <O25 15.0>, <AS76 16.0>, <AM22 3.0>, <AB39 -18.0>, <E24 74.0>, <AK64 22.0>, <K60 34.0>, <AQ55 4.0>, <X37 -41.0>, <AF30 20.0>, <AD26 110.0>, <AM112 12.0>, <O115 52.0>, <T65 175.0>, <O94 -4.0>, <I46 -78.0>, <M37 -3.0>, <T79 241.0>, <AI37 -55.0>, <B45 -321.0>, <AD112 25.0>, <B112 203.0>, <AM77 7.0>, <B64 332.0>, <AI28 76.0>, <AQ40 -5.0>, <Z64 24.0>, <AI106 53.0>, <M93 -32.0>, <O84 'No'>

In [6]:
CI = observations.shift(RIGHT)
CI

{<Y30 14.0>, <L92 11.0>, <AV115 0.0>, <F53 27.0>, <F44 13.0>, <AE15 '+/-CI'>, <F80 11.0>, <AA24 4.0>, <R71 1.0>, <AN46 1.0>, <AA104 6.0>, <U88 15.0>, <P111 4.0>, <AE73 14.0>, <AT43 3.0>, <W91 5.0>, <AR107 6.0>, <AV53 1.0>, <AT76 4.0>, <AA54 7.0>, <C89 33.0>, <W61 5.0>, <AC92 3.0>, <AC50 ''>, <AJ41 8.0>, <AP96 3.0>, <C39 34.0>, <U32 19.0>, <AT75 5.0>, <N81 15.0>, <P43 2.0>, <F22 18.0>, <H66 ''>, <AC28 9.0>, <AV73 0.0>, <J116 ''>, <F108 17.0>, <L100 ''>, <AN45 1.0>, <R45 1.0>, <AR59 4.0>, <U99 9.0>, <F24 14.0>, <AL82 5.0>, <U63 25.0>, <AA26 4.0>, <AG107 8.0>, <AP106 5.0>, <H53 41.0>, <N91 20.0>, <R58 3.0>, <AP33 5.0>, <AR31 7.0>, <AJ76 9.0>, <C56 ':'>, <Y98 6.0>, <AL99 1.0>, <L93 11.0>, <AJ42 7.0>, <H94 16.0>, <AN43 1.0>, <AL65 8.0>, <H26 26.0>, <F46 12.0>, <H83 30.0>, <AL61 6.0>, <AC88 9.0>, <Y56 20.0>, <N29 13.0>, <AL64 6.0>, <Y89 9.0>, <AE104 11.0>, <AT109 5.0>, <U78 17.0>, <U103 31.0>, <C115 39.0>, <AC62 11.0>, <N25 19.0>, <L45 8.0>, <Y62 15.0>, <F74 13.0>, <AN106 5.0>, <AC47 8.0>, <

In [7]:
Citizenship1 = tab.excel_ref('B12').expand(RIGHT).is_not_blank()
Citizenship1

{<E12 'British\n(Including Overseas Territories)'>, <B12 'All citizenships'>, <T12 'Non-European Union3'>, <I12 'European Union2'>, <G12 'Non-British'>, <AX12 'All citizenships'>}

In [8]:
Citizenship2 = tab.excel_ref('I13').expand(RIGHT).is_not_blank()
Citizenship2

{<X13 'Asia'>, <T13 'All3'>, <K13 'European Union EU15'>, <AI13 'Rest of the World'>, <M13 'European Union EU8'>, <Q13 'European Union Other'>, <I13 'European Union2'>, <AU13 'Stateless'>, <O13 'European Union EU2'>, <V13 'Other Europe3'>}

In [9]:
Citizenship3 = tab.excel_ref('X14').expand(RIGHT).is_not_blank()
Citizenship3

{<AF14 'South East Asia'>, <AB14 'East Asia'>, <AI14 'All'>, <AK14 'Sub-Saharan Africa'>, <AD14 'South Asia'>, <AS14 'Oceania'>, <AO14 'North America'>, <AM14 'North Africa'>, <X14 'All'>, <Z14 'Middle East and Central Asia'>, <AQ14 'Central and South America'>, <AX14 'Original Estimates1'>}

In [10]:
Geography = tab.excel_ref('A').expand(DOWN).by_index([17,67])
Geography

{<A17 'United Kingdom'>, <A67 'England and Wales'>}

In [11]:
Flow = tab.excel_ref('A').expand(DOWN).by_index([19,35,51,69,85,101])
Flow

{<A85 'Outflow'>, <A19 'Inflow'>, <A69 'Inflow'>, <A51 'Balance'>, <A101 'Balance'>, <A35 'Outflow'>}

In [12]:
Year = tab.excel_ref('A12').expand(DOWN) - Geography - Flow - tab.excel_ref('A116').expand(DOWN)  
Year

{<A29 2012.0>, <A63 2014.0>, <A92 2009.0>, <A65 '2016'>, <A58 2009.0>, <A37 2004.0>, <A99 '2016'>, <A71 2004.0>, <A79 2012.0>, <A78 2011.0>, <A77 2010.0>, <A96 2013.0>, <A39 2006.0>, <A28 2011.0>, <A59 2010.0>, <A73 2006.0>, <A82 '2015'>, <A38 2005.0>, <A48 '2015'>, <A14 ''>, <A13 ''>, <A49 '2016'>, <A40 2007.0>, <A62 2013.0>, <A22 2005.0>, <A36 ''>, <A24 2007.0>, <A81 2014.0>, <A103 2004.0>, <A76 2009.0>, <A20 ''>, <A42 2009.0>, <A21 2004.0>, <A115 '2016'>, <A31 2014.0>, <A104 2005.0>, <A80 2013.0>, <A23 2006.0>, <A110 2011.0>, <A107 2008.0>, <A102 ''>, <A88 2005.0>, <A43 2010.0>, <A50 'Significant Change?'>, <A60 2011.0>, <A57 2008.0>, <A64 '2015'>, <A98 '2015'>, <A91 2008.0>, <A86 ''>, <A100 'Significant Change?'>, <A27 2010.0>, <A105 2006.0>, <A46 2013.0>, <A106 2007.0>, <A61 2012.0>, <A87 2004.0>, <A97 2014.0>, <A90 2007.0>, <A26 2009.0>, <A47 2014.0>, <A66 'Significant Change?'>, <A111 2012.0>, <A108 2009.0>, <A16 ''>, <A109 2010.0>, <A53 2004.0>, <A83 '2016'>, <A15 ''>, <A95 201

In [13]:
Tag = tab.excel_ref('B15').expand(RIGHT).is_not_blank()
Tag

{<AJ15 '+/-CI'>, <J15 '+/-CI'>, <U15 '+/-CI'>, <AN15 '+/-CI'>, <C15 '+/-CI'>, <N15 '+/-CI'>, <M15 'Estimate'>, <X15 'Estimate'>, <AE15 '+/-CI'>, <AK15 'Estimate'>, <Z15 'Estimate'>, <G15 'Estimate'>, <AX15 'Estimate'>, <T15 'Estimate'>, <AP15 '+/-CI'>, <AT15 '+/-CI'>, <L15 '+/-CI'>, <AS15 'Estimate'>, <B15 'Estimate'>, <I15 'Estimate'>, <AM15 'Estimate'>, <F15 '+/-CI'>, <AF15 'Estimate'>, <Q15 'Estimate'>, <AR15 '+/-CI'>, <W15 '+/-CI'>, <R15 '+/-CI'>, <P15 '+/-CI'>, <AO15 'Estimate'>, <O15 'Estimate'>, <K15 'Estimate'>, <AY15 '+/-CI'>, <Y15 '+/-CI'>, <AB15 'Estimate'>, <AI15 'Estimate'>, <AL15 '+/-CI'>, <AG15 '+/-CI'>, <H15 '+/-CI'>, <V15 'Estimate'>, <AC15 '+/-CI'>, <AV15 '+/-CI'>, <AU15 'Estimate'>, <E15 'Estimate'>, <AQ15 'Estimate'>, <AA15 '+/-CI'>, <AD15 'Estimate'>}

In [14]:
Revision = tab.excel_ref('AX14').expand(RIGHT).is_not_blank()
Revision

{<AX14 'Original Estimates1'>}

In [15]:
Dimensions = [
            HDim(Year,'Year',DIRECTLY,LEFT),
            HDim(Geography,'Geography',CLOSEST,ABOVE),
            HDim(Citizenship1,'Citizenship1',CLOSEST,LEFT),
            HDim(Citizenship2,'Region',CLOSEST,LEFT),
            HDim(Citizenship3,'Territory',DIRECTLY,ABOVE),
            HDim(Flow,'Flow',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People (thousands)'),
            HDim(Revision,'Revision',DIRECTLY,ABOVE),
            HDim(CI,'CI',DIRECTLY,RIGHT)
            ]

In [16]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2,3,4,5,6,7,8
OBS,Year,Geography,Citizenship1,Region,Territory,Flow,Revision,CI

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,Highlight significant changes over the last year?,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Table 2.01a,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Series MN
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Long-Term International Migration,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"United Kingdom,"
"time series, 2004 to 2016",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,England and Wales


In [17]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Year,Geography,Citizenship1,Region,Territory,Flow,Measure Type,Unit,Revision,CI
0,,Estimate,,,All citizenships,,,,Count,People (thousands),,+/-CI
1,,Estimate,,,British\n(Including Overseas Territories),,,,Count,People (thousands),,+/-CI
2,,Estimate,,,Non-British,,,,Count,People (thousands),,+/-CI
3,,Estimate,,,European Union2,European Union2,,,Count,People (thousands),,+/-CI
4,,Estimate,,,European Union2,European Union EU15,,,Count,People (thousands),,+/-CI
5,,Estimate,,,European Union2,European Union EU8,,,Count,People (thousands),,+/-CI
6,,Estimate,,,European Union2,European Union EU2,,,Count,People (thousands),,+/-CI
7,,Estimate,,,European Union2,European Union Other,,,Count,People (thousands),,+/-CI
8,,Estimate,,,Non-European Union3,All3,,,Count,People (thousands),,+/-CI
9,,Estimate,,,Non-European Union3,Other Europe3,,,Count,People (thousands),,+/-CI


In [18]:
new_table.count()

OBS             1879
DATAMARKER       173
Year            1857
Geography       1856
Citizenship1    1879
Region          1624
Territory        944
Flow            1856
Measure Type    1879
Unit            1879
Revision           9
CI              1879
dtype: int64

In [19]:
# new_table[new_table['DATAMARKER'] != 'None']

In [20]:
# new_table.OBS.fillna('0', inplace = True)

In [21]:
# new_table.fillna('None', inplace = True)

In [22]:
# def user_perc(x,y):
    
#     if str(x) == 'None':
#         return y
#     else:
#         return 0
    
# new_table['OBS'] = new_table.apply(lambda row: user_perc(row['DATAMARKER'], row['OBS']), axis = 1)
# #

In [23]:
new_table.count()

OBS             1879
DATAMARKER       173
Year            1857
Geography       1856
Citizenship1    1879
Region          1624
Territory        944
Flow            1856
Measure Type    1879
Unit            1879
Revision           9
CI              1879
dtype: int64

In [24]:
# new_table[new_table['OBS'] == 'z']

In [25]:
new_table = new_table[new_table['Year'].isnull() == False]

In [26]:
new_table = new_table[new_table['OBS'] != '']

In [27]:
new_table.count()

OBS             1706
DATAMARKER         0
Year            1706
Geography       1706
Citizenship1    1706
Region          1472
Territory        866
Flow            1706
Measure Type    1706
Unit            1706
Revision           8
CI              1706
dtype: int64

In [28]:
new_table['Citizenship1'] = new_table['Citizenship1'].str.rstrip('3')

In [29]:
new_table['Citizenship1'] = new_table['Citizenship1'].str.rstrip('2')
new_table['Citizenship1'] = new_table['Citizenship1'].str.replace('\n', ' ')

In [30]:
new_table['Region'] = new_table['Region'].str.rstrip('3')

In [31]:
# new_table['Region'] = new_table['Region'].map(lambda cell:cell.replace('Europe3', 'Europe'))

In [32]:
new_table['Citizenship1'] = new_table['Citizenship1'].astype(str)

In [33]:
new_table['Region'] = new_table['Region'].astype(str)

In [34]:
new_table['Territory'] = new_table['Territory'].astype(str)

In [35]:
new_table['Region'] = new_table['Region'].map(lambda cell:cell.replace('Union2', 'Union'))

In [36]:
new_table['Territory'] = new_table['Territory'].str.rstrip('1')

In [37]:
new_table

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Citizenship1,Region,Territory,Flow,Measure Type,Unit,Revision,CI
23,589,,2004.0,United Kingdom,All citizenships,,,Inflow,Count,People (thousands),,40.0
24,92,,2004.0,United Kingdom,British (Including Overseas Territories),,,Inflow,Count,People (thousands),,14.0
25,497,,2004.0,United Kingdom,Non-British,,,Inflow,Count,People (thousands),,38.0
26,127,,2004.0,United Kingdom,European Union,European Union,,Inflow,Count,People (thousands),,22.0
27,76,,2004.0,United Kingdom,European Union,European Union EU15,,Inflow,Count,People (thousands),,15.0
28,51,,2004.0,United Kingdom,European Union,European Union EU8,,Inflow,Count,People (thousands),,16.0
30,0,,2004.0,United Kingdom,European Union,European Union Other,,Inflow,Count,People (thousands),,1.0
31,370,,2004.0,United Kingdom,Non-European Union,All,,Inflow,Count,People (thousands),,30.0
32,17,,2004.0,United Kingdom,Non-European Union,Other Europe,,Inflow,Count,People (thousands),,5.0
33,192,,2004.0,United Kingdom,Non-European Union,Asia,All,Inflow,Count,People (thousands),,24.0


In [38]:
def user_perc(x):
    
    if str(x) == ':':
        return '2011 Census Revision'
    else:
        return 'Original Estimate'
    
new_table['Revision'] = new_table.apply(lambda row: user_perc(row['CI']), axis = 1)

In [39]:
new_table.head()

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Citizenship1,Region,Territory,Flow,Measure Type,Unit,Revision,CI
23,589,,2004.0,United Kingdom,All citizenships,,,Inflow,Count,People (thousands),Original Estimate,40.0
24,92,,2004.0,United Kingdom,British (Including Overseas Territories),,,Inflow,Count,People (thousands),Original Estimate,14.0
25,497,,2004.0,United Kingdom,Non-British,,,Inflow,Count,People (thousands),Original Estimate,38.0
26,127,,2004.0,United Kingdom,European Union,European Union,,Inflow,Count,People (thousands),Original Estimate,22.0
27,76,,2004.0,United Kingdom,European Union,European Union EU15,,Inflow,Count,People (thousands),Original Estimate,15.0


In [40]:
# new_table['Key'] = new_table['Year'] + '/' + new_table['Geography'] + '/' + new_table['Citizenship1'] + '/' + new_table['Region'] + '/' + new_table['Territory'] + '/' + new_table['Flow']

In [41]:
# new_table = pd.merge(new_table, new_table[['Key', 'OBS', 'Tag']], how = 'left', left_on = 'Key', right_on = 'Key')

In [42]:
# new_table.count()

In [43]:
# new_table.count()

In [44]:
# new_table  = new_table[new_table['Tag_x'] != new_table['Tag_y']]

In [45]:
# new_table.count()

In [46]:
# new_table.drop_duplicates('Key', inplace = True)

In [47]:
# new_table.count()

In [48]:
# new_table['OBS_x'] = new_table['OBS_x'].astype(int)

In [49]:
# new_table['OBS_y'] = pd.to_numeric(new_table['OBS_y'])

In [50]:
# new_table['OBS_y'].fillna(0, inplace = True)

In [51]:
# new_table['OBS_y'] = new_table['OBS_y'].astype(int)

In [52]:
new_table['Value'] = new_table['OBS'].astype(str) 

In [53]:
new_table['CI'] = new_table['CI'].astype(str) 

In [54]:
new_table

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Citizenship1,Region,Territory,Flow,Measure Type,Unit,Revision,CI,Value
23,589,,2004.0,United Kingdom,All citizenships,,,Inflow,Count,People (thousands),Original Estimate,40.0,589.0
24,92,,2004.0,United Kingdom,British (Including Overseas Territories),,,Inflow,Count,People (thousands),Original Estimate,14.0,92.0
25,497,,2004.0,United Kingdom,Non-British,,,Inflow,Count,People (thousands),Original Estimate,38.0,497.0
26,127,,2004.0,United Kingdom,European Union,European Union,,Inflow,Count,People (thousands),Original Estimate,22.0,127.0
27,76,,2004.0,United Kingdom,European Union,European Union EU15,,Inflow,Count,People (thousands),Original Estimate,15.0,76.0
28,51,,2004.0,United Kingdom,European Union,European Union EU8,,Inflow,Count,People (thousands),Original Estimate,16.0,51.0
30,0,,2004.0,United Kingdom,European Union,European Union Other,,Inflow,Count,People (thousands),Original Estimate,1.0,0.0
31,370,,2004.0,United Kingdom,Non-European Union,All,,Inflow,Count,People (thousands),Original Estimate,30.0,370.0
32,17,,2004.0,United Kingdom,Non-European Union,Other Europe,,Inflow,Count,People (thousands),Original Estimate,5.0,17.0
33,192,,2004.0,United Kingdom,Non-European Union,Asia,All,Inflow,Count,People (thousands),Original Estimate,24.0,192.0


In [55]:
new_table['Citizenship'] = new_table['Citizenship1'] + str(':') + new_table['Region'] + str(':') + new_table['Territory'] 

In [56]:
new_table['Citizenship'] = new_table['Citizenship'].map(lambda cell:cell.replace(':None', ''))

In [57]:
# new_table['Citizenship'] = new_table['Citizenship'].str.rstrip(':None')

In [58]:
new_table

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Citizenship1,Region,Territory,Flow,Measure Type,Unit,Revision,CI,Value,Citizenship
23,589,,2004.0,United Kingdom,All citizenships,,,Inflow,Count,People (thousands),Original Estimate,40.0,589.0,All citizenships
24,92,,2004.0,United Kingdom,British (Including Overseas Territories),,,Inflow,Count,People (thousands),Original Estimate,14.0,92.0,British (Including Overseas Territories)
25,497,,2004.0,United Kingdom,Non-British,,,Inflow,Count,People (thousands),Original Estimate,38.0,497.0,Non-British
26,127,,2004.0,United Kingdom,European Union,European Union,,Inflow,Count,People (thousands),Original Estimate,22.0,127.0,European Union:European Union
27,76,,2004.0,United Kingdom,European Union,European Union EU15,,Inflow,Count,People (thousands),Original Estimate,15.0,76.0,European Union:European Union EU15
28,51,,2004.0,United Kingdom,European Union,European Union EU8,,Inflow,Count,People (thousands),Original Estimate,16.0,51.0,European Union:European Union EU8
30,0,,2004.0,United Kingdom,European Union,European Union Other,,Inflow,Count,People (thousands),Original Estimate,1.0,0.0,European Union:European Union Other
31,370,,2004.0,United Kingdom,Non-European Union,All,,Inflow,Count,People (thousands),Original Estimate,30.0,370.0,Non-European Union:All
32,17,,2004.0,United Kingdom,Non-European Union,Other Europe,,Inflow,Count,People (thousands),Original Estimate,5.0,17.0,Non-European Union:Other Europe
33,192,,2004.0,United Kingdom,Non-European Union,Asia,All,Inflow,Count,People (thousands),Original Estimate,24.0,192.0,Non-European Union:Asia:All


In [59]:
new_table['Year'] = new_table['Year'].astype(str)

In [60]:
new_table['Year'] = pd.to_numeric(new_table['Year'])

In [61]:
new_table['Year'] = new_table['Year'].astype(int)

In [62]:
new_table.dtypes

OBS             object
DATAMARKER      object
Year             int32
Geography       object
Citizenship1    object
Region          object
Territory       object
Flow            object
Measure Type    object
Unit            object
Revision        object
CI              object
Value           object
Citizenship     object
dtype: object

In [63]:
new_table['Value'] = pd.to_numeric(new_table['Value'])

In [64]:
new_table['Value'] = new_table['Value'].astype(int)

In [65]:
new_table['CI'] = pd.to_numeric(new_table['CI'], errors = 'coerce').fillna(0)

In [66]:
new_table['CI'] = new_table['CI'].astype(int)

In [67]:
new_table.count()

OBS             1706
DATAMARKER         0
Year            1706
Geography       1706
Citizenship1    1706
Region          1706
Territory       1706
Flow            1706
Measure Type    1706
Unit            1706
Revision        1706
CI              1706
Value           1706
Citizenship     1706
dtype: int64

In [68]:
new_table['Geography'] = new_table['Geography'].map(lambda cell:cell.replace('United Kingdom', 'K02000001'))

In [69]:
new_table['Geography'] = new_table['Geography'].map(lambda cell:cell.replace('England and Wales', 'K04000001'))

In [70]:
new_table = new_table[['Geography','Year','Citizenship','Flow','Measure Type','Value','CI','Unit', 'Revision']]

In [71]:
new_table.head(5)

Unnamed: 0,Geography,Year,Citizenship,Flow,Measure Type,Value,CI,Unit,Revision
23,K02000001,2004,All citizenships,Inflow,Count,589,40,People (thousands),Original Estimate
24,K02000001,2004,British (Including Overseas Territories),Inflow,Count,92,14,People (thousands),Original Estimate
25,K02000001,2004,Non-British,Inflow,Count,497,38,People (thousands),Original Estimate
26,K02000001,2004,European Union:European Union,Inflow,Count,127,22,People (thousands),Original Estimate
27,K02000001,2004,European Union:European Union EU15,Inflow,Count,76,15,People (thousands),Original Estimate


In [72]:
new_table.count()

Geography       1706
Year            1706
Citizenship     1706
Flow            1706
Measure Type    1706
Value           1706
CI              1706
Unit            1706
Revision        1706
dtype: int64

In [73]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tidydata2_1.csv'), index = False)

In [74]:
writeMetadata(metadata, 'ONS-LTIM-citizenship', 'Migration')