Long-term international migration 2.05, Occupation

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
from pathlib import Path

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

%run lib/scrape_ons.ipynb

metadata = scrape('https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/' \
                  'internationalmigration/datasets/longterminternationalmigrationusualoccupationpriortomigrationtable205')
metadata

{'about': 'Regular job of migrants entering or leaving UK. Estimates of Long-Term International Migration, annual table.',
 'fileURL': 'https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/internationalmigration/datasets/longterminternationalmigrationusualoccupationpriortomigrationtable205/current/2.05ltimusualoccupationpriortomigration1991to2016.xls',
 'mailto': 'mailto:migstatsunit@ons.gsi.gov.uk',
 'releaseDate': datetime.date(2017, 11, 30),
 'title': 'Long-term international migration 2.05, usual occupation prior to migration, UK and England and Wales'}

In [3]:
inputFile = sourceFolder / 'data.xls'
response = session.get(metadata['fileURL'])
with open(inputFile, 'wb') as f:
  f.write(response.content)
tab = loadxlstabs(inputFile, sheetids='Table 2.05')[0]

Loading in\data.xls which has size 171008 bytes
Table names: ['Table 2.05']


In [4]:
# tab = pd.read_excel(inputFile, header = None, sheet_name = 1)
# tab

In [5]:
observations = tab.excel_ref('B20').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B193').expand(DOWN).expand(RIGHT).is_not_blank()

In [6]:
observations = observations - tab.excel_ref('B').expand(DOWN).expand(RIGHT).by_index([46,75,104,135,164,193])
observations

{<H115 9.0>, <E28 151.0>, <N175 10.0>, <G84 -8.0>, <H150 21.0>, <L190 7.0>, <L150 8.0>, <E156 -127.0>, <I80 6.0>, <H128 16.0>, <L121 9.0>, <J92 23.0>, <N38 9.0>, <F159 10.0>, <I75 'No'>, <B188 165.0>, <M82 4.0>, <F37 23.0>, <K128 55.0>, <J184 20.0>, <N62 8.0>, <C61 32.0>, <L99 13.0>, <G26 50.0>, <H188 16.0>, <M115 43.0>, <E168 -23.0>, <I141 -43.0>, <I41 186.0>, <H43 20.0>, <M70 -15.0>, <N90 13.0>, <N127 9.0>, <K168 30.0>, <B95 229.0>, <L64 12.0>, <H112 10.0>, <E113 95.0>, <M139 -36.0>, <K110 55.0>, <E171 23.0>, <F62 17.0>, <L118 8.0>, <M88 22.0>, <C103 41.0>, <E139 -92.0>, <N182 15.0>, <G164 'Up'>, <L65 7.0>, <B132 583.0>, <N26 12.0>, <M39 33.0>, <J23 9.0>, <H78 15.0>, <B45 589.0>, <G159 -73.0>, <C38 30.0>, <H101 23.0>, <M75 'No'>, <K51 -36.0>, <L169 8.0>, <F189 16.0>, <H54 10.0>, <H87 17.0>, <L81 10.0>, <J188 18.0>, <H170 14.0>, <H79 12.0>, <G102 74.0>, <H90 27.0>, <F111 10.0>, <E186 34.0>, <N140 8.0>, <B155 -393.0>, <J71 8.0>, <I38 189.0>, <M121 42.0>, <F33 23.0>, <F89 26.0>, <C129 2

In [7]:
Occupation = tab.excel_ref('B12').expand(RIGHT).is_not_blank()
Occupation

{<I12 'Students2'>, <P12 'All persons'>, <B12 'All persons'>, <M12 'Children4'>, <E12 'Professional and managerial2'>, <G12 'Manual and clerical2'>, <K12 'Other adults3'>}

In [8]:
Geography = tab.excel_ref('A').expand(DOWN).by_index([16,105])
Geography

{<A105 'England and Wales'>, <A16 'United Kingdom'>}

In [9]:
Revisions = tab.excel_ref('B13').expand(RIGHT).is_not_blank()
Revisions

{<B13 '2011 Census Revisions1'>, <P13 'Original Estimates1'>}

In [10]:
Flow = tab.excel_ref('A').expand(DOWN).by_index([18,47,76,107,136,165])
Flow

{<A165 'Balance'>, <A47 'Outflow'>, <A136 'Outflow'>, <A107 'Inflow'>, <A76 'Balance'>, <A18 'Inflow'>}

In [11]:
Year = tab.excel_ref('A20').expand(DOWN) - Geography - Flow - tab.excel_ref('A').expand(DOWN).by_index([46,75,104,135,164,193]) 
Year

{<A94 2007.0>, <A89 2002.0>, <A209 'The latest estimates (2016) have been compared with the corresponding estimates for the period one year earlier (2015). Where changes have been found to be statistically significant, the relevant pair of estimates have been highlighted by setting their background colour. Please see the Notes worksheet for more information.'>, <A132 2014.0>, <A137 ''>, <A181 2005.0>, <A103 '2016'>, <A80 1993.0>, <A91 2004.0>, <A48 ''>, <A49 1991.0>, <A160 2013.0>, <A97 2010.0>, <A70 2012.0>, <A197 'Totals may not sum due to rounding.'>, <A64 2006.0>, <A99 2012.0>, <A116 1998.0>, <A142 1995.0>, <A20 1991.0>, <A111 1993.0>, <A23 1994.0>, <A77 ''>, <A118 2000.0>, <A26 1997.0>, <A42 2013.0>, <A212 '© Crown copyright. You may re-use this information (not including logos) free of charge in any format or medium, under the terms of the Open Government Licence.'>, <A145 1998.0>, <A214 'or write to the Information Policy Team, The National Archives, Kew, London TW9 4DU. Email: 

In [12]:
Tag = tab.excel_ref('B14').expand(RIGHT).is_not_blank()
Tag

{<I14 'Estimate'>, <L14 '+/-CI'>, <M14 'Estimate'>, <G14 'Estimate'>, <J14 '+/-CI'>, <E14 'Estimate'>, <B14 'Estimate'>, <Q14 '+/-CI'>, <P14 'Estimate'>, <N14 '+/-CI'>, <C14 '+/-CI'>, <H14 '+/-CI'>, <K14 'Estimate'>, <F14 '+/-CI'>}

In [13]:
Dimensions = [
            HDim(Year,'Year',DIRECTLY,LEFT),
            HDim(Geography,'Geography',CLOSEST,ABOVE),
            HDim(Occupation,'Occupation',CLOSEST,LEFT),
            HDim(Flow,'Flow',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People(thousands)'),
            HDim(Tag, 'Tag',DIRECTLY,ABOVE),
            HDim(Revisions, 'Revisions',CLOSEST,LEFT)
            ]

In [14]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2,3,4,5,6
OBS,Year,Geography,Occupation,Flow,Tag,Revisions

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,Highlight significant changes over the last year?,,,,,,,,,1,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,
Table 2.05,,,,,,,,,,,,,,,,Series MN,
,,,,,,,,,,,,,,,,,
Long-Term International Migration,,,,,,,,,,,,,,,,"United Kingdom,",
"time series, 1991 to 2016",,,,,,,,,,,,,,,,England and Wales,


In [15]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag,Revisions
0,329,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
1,23,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
2,94,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
3,11,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
4,64,,1991.0,United Kingdom,Manual and clerical2,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
5,11,,1991.0,United Kingdom,Manual and clerical2,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
6,57,,1991.0,United Kingdom,Students2,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
7,10,,1991.0,United Kingdom,Students2,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
8,58,,1991.0,United Kingdom,Other adults3,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
9,9,,1991.0,United Kingdom,Other adults3,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1


In [16]:
new_table.count()

OBS             1926
DATAMARKER        43
Year            1894
Geography       1926
Occupation      1926
Flow            1926
Measure Type    1926
Unit            1926
Tag             1924
Revisions       1926
dtype: int64

In [17]:
# new_table.to_csv('abc.csv')

In [18]:
def user_perc1(x,y):
    
    if str(x) == 'All persons':
        return y
    else:
        return 'None'
    
new_table['Revisions'] = new_table.apply(lambda row: user_perc1(row['Occupation'], row['Revisions']), axis = 1)

In [19]:
new_table.head(5)

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag,Revisions
0,329,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
1,23,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
2,94,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),Estimate,
3,11,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),+/-CI,
4,64,,1991.0,United Kingdom,Manual and clerical2,Inflow,Count,People(thousands),Estimate,


In [20]:
new_table = new_table[new_table['Year'].isnull() == False]

In [21]:
new_table.count()

OBS             1894
DATAMARKER        11
Year            1894
Geography       1894
Occupation      1894
Flow            1894
Measure Type    1894
Unit            1894
Tag             1894
Revisions       1894
dtype: int64

In [22]:
new_table[new_table['DATAMARKER'] != 'None']

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag,Revisions
0,329,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions1
1,23,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions1
2,94,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),Estimate,
3,11,,1991.0,United Kingdom,Professional and managerial2,Inflow,Count,People(thousands),+/-CI,
4,64,,1991.0,United Kingdom,Manual and clerical2,Inflow,Count,People(thousands),Estimate,
5,11,,1991.0,United Kingdom,Manual and clerical2,Inflow,Count,People(thousands),+/-CI,
6,57,,1991.0,United Kingdom,Students2,Inflow,Count,People(thousands),Estimate,
7,10,,1991.0,United Kingdom,Students2,Inflow,Count,People(thousands),+/-CI,
8,58,,1991.0,United Kingdom,Other adults3,Inflow,Count,People(thousands),Estimate,
9,9,,1991.0,United Kingdom,Other adults3,Inflow,Count,People(thousands),+/-CI,


In [23]:
new_table.OBS.fillna('0', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


In [24]:
new_table.fillna('None', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [25]:
def user_perc(x,y):
    
    if str(x) == 'None':
        return y
    else:
        return 0
    
new_table['OBS'] = new_table.apply(lambda row: user_perc(row['DATAMARKER'], row['OBS']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [26]:
# new_table[new_table['OBS'] == 'z']

In [27]:
new_table.count()

OBS             1894
DATAMARKER      1894
Year            1894
Geography       1894
Occupation      1894
Flow            1894
Measure Type    1894
Unit            1894
Tag             1894
Revisions       1894
dtype: int64

In [28]:
new_table['Occupation'] = new_table['Occupation'].str.rstrip('234')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [29]:
new_table['Revisions'] = new_table['Revisions'].str.rstrip('1')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [30]:
new_table.head()

Unnamed: 0,OBS,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag,Revisions
0,329.0,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions
1,23.0,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),+/-CI,2011 Census Revisions
2,94.0,,1991.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),Estimate,
3,11.0,,1991.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),+/-CI,
4,64.0,,1991.0,United Kingdom,Manual and clerical,Inflow,Count,People(thousands),Estimate,


In [31]:
new_table.count()

OBS             1894
DATAMARKER      1894
Year            1894
Geography       1894
Occupation      1894
Flow            1894
Measure Type    1894
Unit            1894
Tag             1894
Revisions       1894
dtype: int64

In [32]:
new_table['Key'] = new_table['Year'] + '/' + new_table['Geography'] + '/' + new_table['Occupation'] + '/' + new_table['Flow'] + new_table['Revisions']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [33]:
Final_table = pd.merge(new_table, new_table[['Key','OBS','Tag']], how = 'left', left_on = 'Key', right_on = 'Key')

In [34]:
new_table.count()

OBS             1894
DATAMARKER      1894
Year            1894
Geography       1894
Occupation      1894
Flow            1894
Measure Type    1894
Unit            1894
Tag             1894
Revisions       1894
Key             1894
dtype: int64

In [35]:
Final_table.count()

OBS_x           3788
DATAMARKER      3788
Year            3788
Geography       3788
Occupation      3788
Flow            3788
Measure Type    3788
Unit            3788
Tag_x           3788
Revisions       3788
Key             3788
OBS_y           3788
Tag_y           3788
dtype: int64

In [36]:
Final_table  = Final_table[Final_table['Tag_x'] != Final_table['Tag_y']]

In [37]:
Final_table.count()

OBS_x           1894
DATAMARKER      1894
Year            1894
Geography       1894
Occupation      1894
Flow            1894
Measure Type    1894
Unit            1894
Tag_x           1894
Revisions       1894
Key             1894
OBS_y           1894
Tag_y           1894
dtype: int64

In [38]:
Final_table.drop_duplicates('Key', inplace = True)

In [39]:
Final_table.count()

OBS_x           947
DATAMARKER      947
Year            947
Geography       947
Occupation      947
Flow            947
Measure Type    947
Unit            947
Tag_x           947
Revisions       947
Key             947
OBS_y           947
Tag_y           947
dtype: int64

In [40]:
Final_table

Unnamed: 0,OBS_x,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag_x,Revisions,Key,OBS_y,Tag_y
1,329.0,,1991.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions,1991.0/United Kingdom/All persons/Inflow2011 C...,23.0,+/-CI
5,94.0,,1991.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Professional and manager...,11.0,+/-CI
9,64.0,,1991.0,United Kingdom,Manual and clerical,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Manual and clerical/Infl...,11.0,+/-CI
13,57.0,,1991.0,United Kingdom,Students,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Students/InflowNone,10.0,+/-CI
17,58.0,,1991.0,United Kingdom,Other adults,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Other adults/InflowNone,9.0,+/-CI
21,56.0,,1991.0,United Kingdom,Children,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Children/InflowNone,10.0,+/-CI
25,268.0,,1992.0,United Kingdom,All persons,Inflow,Count,People(thousands),Estimate,2011 Census Revisions,1992.0/United Kingdom/All persons/Inflow2011 C...,20.0,+/-CI
29,74.0,,1992.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Professional and manager...,10.0,+/-CI
33,51.0,,1992.0,United Kingdom,Manual and clerical,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Manual and clerical/Infl...,8.0,+/-CI
37,50.0,,1992.0,United Kingdom,Students,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Students/InflowNone,8.0,+/-CI


In [41]:
Final_table['Occupation'] = Final_table['Occupation'] + ':' + Final_table['Revisions']

In [42]:
Final_table['Occupation'] = Final_table['Occupation'].map(lambda cell:cell.replace(':None', ''))

In [43]:
Final_table['OBS_x'] = Final_table['OBS_x'].astype(int)

In [44]:
Final_table['OBS_y'] = pd.to_numeric(Final_table['OBS_y'])

In [45]:
Final_table['OBS_y'].fillna(0, inplace = True)

In [46]:
Final_table['OBS_y'] = Final_table['OBS_y'].astype(int)

In [47]:
Final_table['Value'] = Final_table['OBS_x'].astype(str)

In [48]:
Final_table['CI'] = Final_table['OBS_y'].astype(str)

In [49]:
# Final_table['Value'] = Final_table['OBS_x'].astype(str) + ' ± '+ Final_table['OBS_y'].astype(str)

In [50]:
Final_table

Unnamed: 0,OBS_x,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag_x,Revisions,Key,OBS_y,Tag_y,Value,CI
1,329,,1991.0,United Kingdom,All persons:2011 Census Revisions,Inflow,Count,People(thousands),Estimate,2011 Census Revisions,1991.0/United Kingdom/All persons/Inflow2011 C...,23,+/-CI,329,23
5,94,,1991.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Professional and manager...,11,+/-CI,94,11
9,64,,1991.0,United Kingdom,Manual and clerical,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Manual and clerical/Infl...,11,+/-CI,64,11
13,57,,1991.0,United Kingdom,Students,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Students/InflowNone,10,+/-CI,57,10
17,58,,1991.0,United Kingdom,Other adults,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Other adults/InflowNone,9,+/-CI,58,9
21,56,,1991.0,United Kingdom,Children,Inflow,Count,People(thousands),Estimate,,1991.0/United Kingdom/Children/InflowNone,10,+/-CI,56,10
25,268,,1992.0,United Kingdom,All persons:2011 Census Revisions,Inflow,Count,People(thousands),Estimate,2011 Census Revisions,1992.0/United Kingdom/All persons/Inflow2011 C...,20,+/-CI,268,20
29,74,,1992.0,United Kingdom,Professional and managerial,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Professional and manager...,10,+/-CI,74,10
33,51,,1992.0,United Kingdom,Manual and clerical,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Manual and clerical/Infl...,8,+/-CI,51,8
37,50,,1992.0,United Kingdom,Students,Inflow,Count,People(thousands),Estimate,,1992.0/United Kingdom/Students/InflowNone,8,+/-CI,50,8


In [51]:
Final_table['Year'] = Final_table['Year'].astype(str)

In [52]:
Final_table.dtypes

OBS_x            int32
DATAMARKER      object
Year            object
Geography       object
Occupation      object
Flow            object
Measure Type    object
Unit            object
Tag_x           object
Revisions       object
Key             object
OBS_y            int32
Tag_y           object
Value           object
CI              object
dtype: object

In [53]:
Final_table.Year.unique()

array(['1991.0', '1992.0', '1993.0', '1994.0', '1995.0', '1996.0',
       '1997.0', '1998.0', '1999.0', '2000.0', '2001.0', '2002.0',
       '2003.0', '2004.0', '2005.0', '2006.0', '2007.0', '2008.0',
       '2009.0', '2010.0', '2011.0', '2012.0', '2013.0', '2014.0', '2015',
       '2016'], dtype=object)

In [54]:
Final_table = Final_table[Final_table['Year'] !=  'None' ]

In [55]:
# Final_table['Year'] = Final_table(int(hex(Year),16))

In [56]:
Final_table['Year'] = pd.to_numeric(Final_table['Year'])

In [57]:
Final_table['Year'] = Final_table['Year'].astype(int)

In [58]:
Final_table.dtypes

OBS_x            int32
DATAMARKER      object
Year             int32
Geography       object
Occupation      object
Flow            object
Measure Type    object
Unit            object
Tag_x           object
Revisions       object
Key             object
OBS_y            int32
Tag_y           object
Value           object
CI              object
dtype: object

In [59]:
Final_table.tail(5)

Unnamed: 0,OBS_x,DATAMARKER,Year,Geography,Occupation,Flow,Measure Type,Unit,Tag_x,Revisions,Key,OBS_y,Tag_y,Value,CI
3769,53,,2016,England and Wales,Professional and managerial,Balance,Count,People(thousands),Estimate,,2016/England and Wales/Professional and manage...,21,+/-CI,53,21
3773,42,,2016,England and Wales,Manual and clerical,Balance,Count,People(thousands),Estimate,,2016/England and Wales/Manual and clerical/Bal...,23,+/-CI,42,23
3777,90,,2016,England and Wales,Students,Balance,Count,People(thousands),Estimate,,2016/England and Wales/Students/BalanceNone,20,+/-CI,90,20
3781,24,,2016,England and Wales,Other adults,Balance,Count,People(thousands),Estimate,,2016/England and Wales/Other adults/BalanceNone,7,+/-CI,24,7
3785,21,,2016,England and Wales,Children,Balance,Count,People(thousands),Estimate,,2016/England and Wales/Children/BalanceNone,10,+/-CI,21,10


In [60]:
Final_table['Geography'] = Final_table['Geography'].map(lambda cell:cell.replace('United Kingdom', 'K02000001'))

In [61]:
Final_table['Geography'] = Final_table['Geography'].map(lambda cell:cell.replace('England and Wales', 'K04000001'))

In [62]:
Final_table = Final_table[['Geography','Year','Occupation','Flow','Measure Type','Value', 'CI','Unit']]

In [63]:
Final_table.head(5)

Unnamed: 0,Geography,Year,Occupation,Flow,Measure Type,Value,CI,Unit
1,K02000001,1991,All persons:2011 Census Revisions,Inflow,Count,329,23,People(thousands)
5,K02000001,1991,Professional and managerial,Inflow,Count,94,11,People(thousands)
9,K02000001,1991,Manual and clerical,Inflow,Count,64,11,People(thousands)
13,K02000001,1991,Students,Inflow,Count,57,10,People(thousands)
17,K02000001,1991,Other adults,Inflow,Count,58,9,People(thousands)


In [64]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('tidydata2_5.csv'), index = False)

In [65]:
writeMetadata(metadata, 'ONS-LTIM-Occupation', 'Migration')