Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region

In [1]:
%run lib/scrape_dwp.ipynb

metadata = scrape('https://www.gov.uk/government/statistics/national-insurance-number-allocations-to-adult-overseas-nationals-to-march-2018')
metadata

{'details': 'We also publish data on the [nationality of DWP working age benefit claimants\nat the point of National Insurance number\nregistration](https://www.gov.uk/government/statistics/nationality-at-point-\nof-nino-registration-of-dwp-working-age-benefit-recipients-data-to-feb-2017).\n\nThis quarterly report contains data on National Insurance number allocations\nto adult overseas nationals entering the UK.\n\nThe summary tables, derived from Stat-Xplore, show National Insurance number\nallocations to adult overseas nationals entering the UK by:\n\n  * quarter of registration and world region – January 2002 to March 2018\n  * region and local authority by world area – registrations year to March 2018\n  * registrations by nationality – year to March 2018\n\n### Explore the statistics with our interactive tools\n\nFull statistics on National Insurance number allocations to adult overseas\nnationals entering the UK are available from [Stat-Xplore](https://stat-\nxplore.dwp.gov.uk/)

The source of the data in this case is an OpenOffice spreadsheet. Unfortunately, Databaker can only read Excel spreadsheets at the moment, so we need to convert. For this we'll use `pyexcel` and plugins for ODS and XLS file formats.

In [2]:
import pyexcel
from io import BytesIO
from pathlib import Path, PurePosixPath

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

ods_files = [f for f in metadata['files'] if f['type'] == 'ODS']
assert len(ods_files) == 1, 'Should be exactly one ODS file'

ods_url = ods_files[0]['url']
ods_title = ods_files[0]['title']
ods_filename = PurePosixPath(urlparse(ods_url).path)

ods_file = BytesIO(session.get(ods_files[0]['url']).content)
xls_filename = sourceFolder / (ods_filename.with_suffix('.xls').name)

pyexcel.save_book_as(file_content=ods_file, file_type='ods', dest_file_name=str(xls_filename))

In [3]:
sheets = loadxlstabs(xls_filename)
tab = sheets[1]

Loading in\nino-registrations-adult-overseas-nationals-march-2018-tables.xls which has size 162304 bytes
Table names: ['CONTENTS', '1', '2', '3', '4']


In [4]:
savepreviewhtml(tab)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,,,,,,,,,,,,,,,,
Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region,,,,,,,,,,,,,,,,
Note : Caution should be taken in interpreting trends based on quarterly data,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,European Union,Non-European Union (Other Europe),Asia,Rest of the World,Unknown,,,,,,,,,,
,TOTAL,European Union EU15,European Union EU8,European Union EU2,European Union Other,Other Europe,Middle East and Central Asia,East Asia,South Asia,South East Asia,Sub-Saharan Africa,North Africa,North America,Central and South America,Oceania,Unknown
,,,,,,,,,,,,,,,,
Jan-02 to Mar-02,69701.0,18150.0,2217.0,1269.0,455.0,3114.0,4411.0,2390.0,10591.0,4063.0,11376.0,757.0,2770.0,2152.0,5902.0,91.0
Apr-02 to Jun-02,56619.0,13416.0,1725.0,1124.0,287.0,2452.0,4600.0,2030.0,9443.0,3407.0,9198.0,661.0,1903.0,1919.0,4363.0,97.0
Jul-02 to Sep-02,94436.0,20844.0,2912.0,1563.0,410.0,3732.0,6900.0,3381.0,15496.0,5231.0,17555.0,1024.0,3079.0,3922.0,8207.0,176.0


In [5]:
observations = tab.excel_ref('B8').expand(DOWN).expand(RIGHT).is_not_blank()
savepreviewhtml(observations)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,,,,,,,,,,,,,,,,
Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region,,,,,,,,,,,,,,,,
Note : Caution should be taken in interpreting trends based on quarterly data,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,European Union,Non-European Union (Other Europe),Asia,Rest of the World,Unknown,,,,,,,,,,
,TOTAL,European Union EU15,European Union EU8,European Union EU2,European Union Other,Other Europe,Middle East and Central Asia,East Asia,South Asia,South East Asia,Sub-Saharan Africa,North Africa,North America,Central and South America,Oceania,Unknown
,,,,,,,,,,,,,,,,
Jan-02 to Mar-02,69701.0,18150.0,2217.0,1269.0,455.0,3114.0,4411.0,2390.0,10591.0,4063.0,11376.0,757.0,2770.0,2152.0,5902.0,91.0
Apr-02 to Jun-02,56619.0,13416.0,1725.0,1124.0,287.0,2452.0,4600.0,2030.0,9443.0,3407.0,9198.0,661.0,1903.0,1919.0,4363.0,97.0
Jul-02 to Sep-02,94436.0,20844.0,2912.0,1563.0,410.0,3732.0,6900.0,3381.0,15496.0,5231.0,17555.0,1024.0,3079.0,3922.0,8207.0,176.0


In [6]:
Area = tab.excel_ref('B6').expand(RIGHT).is_not_blank()
savepreviewhtml(Area)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,,,,,,,,,,,,,,,,
Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region,,,,,,,,,,,,,,,,
Note : Caution should be taken in interpreting trends based on quarterly data,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,European Union,Non-European Union (Other Europe),Asia,Rest of the World,Unknown,,,,,,,,,,
,TOTAL,European Union EU15,European Union EU8,European Union EU2,European Union Other,Other Europe,Middle East and Central Asia,East Asia,South Asia,South East Asia,Sub-Saharan Africa,North Africa,North America,Central and South America,Oceania,Unknown
,,,,,,,,,,,,,,,,
Jan-02 to Mar-02,69701.0,18150.0,2217.0,1269.0,455.0,3114.0,4411.0,2390.0,10591.0,4063.0,11376.0,757.0,2770.0,2152.0,5902.0,91.0
Apr-02 to Jun-02,56619.0,13416.0,1725.0,1124.0,287.0,2452.0,4600.0,2030.0,9443.0,3407.0,9198.0,661.0,1903.0,1919.0,4363.0,97.0
Jul-02 to Sep-02,94436.0,20844.0,2912.0,1563.0,410.0,3732.0,6900.0,3381.0,15496.0,5231.0,17555.0,1024.0,3079.0,3922.0,8207.0,176.0


In [7]:
Period = tab.excel_ref('A8').expand(DOWN).is_not_blank()
Period = Period - Period.regex('^INFO').expand(DOWN)
savepreviewhtml(Period)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,,,,,,,,,,,,,,,,
Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region,,,,,,,,,,,,,,,,
Note : Caution should be taken in interpreting trends based on quarterly data,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,European Union,Non-European Union (Other Europe),Asia,Rest of the World,Unknown,,,,,,,,,,
,TOTAL,European Union EU15,European Union EU8,European Union EU2,European Union Other,Other Europe,Middle East and Central Asia,East Asia,South Asia,South East Asia,Sub-Saharan Africa,North Africa,North America,Central and South America,Oceania,Unknown
,,,,,,,,,,,,,,,,
Jan-02 to Mar-02,69701.0,18150.0,2217.0,1269.0,455.0,3114.0,4411.0,2390.0,10591.0,4063.0,11376.0,757.0,2770.0,2152.0,5902.0,91.0
Apr-02 to Jun-02,56619.0,13416.0,1725.0,1124.0,287.0,2452.0,4600.0,2030.0,9443.0,3407.0,9198.0,661.0,1903.0,1919.0,4363.0,97.0
Jul-02 to Sep-02,94436.0,20844.0,2912.0,1563.0,410.0,3732.0,6900.0,3381.0,15496.0,5231.0,17555.0,1024.0,3079.0,3922.0,8207.0,176.0


In [8]:
Dimensions = [
            HDim(Period,'Period',DIRECTLY,LEFT),
            HDim(Area,'Area', DIRECTLY, ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2
OBS,Period,Area

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,,,,,,,,,,,,,,,,
Table 1: NINo registrations to adult overseas nationals entering the UK by quarter of registration and world region,,,,,,,,,,,,,,,,
Note : Caution should be taken in interpreting trends based on quarterly data,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,European Union,Non-European Union (Other Europe),Asia,Rest of the World,Unknown,,,,,,,,,,
,TOTAL,European Union EU15,European Union EU8,European Union EU2,European Union Other,Other Europe,Middle East and Central Asia,East Asia,South Asia,South East Asia,Sub-Saharan Africa,North Africa,North America,Central and South America,Oceania,Unknown
,,,,,,,,,,,,,,,,
Jan-02 to Mar-02,69701.0,18150.0,2217.0,1269.0,455.0,3114.0,4411.0,2390.0,10591.0,4063.0,11376.0,757.0,2770.0,2152.0,5902.0,91.0
Apr-02 to Jun-02,56619.0,13416.0,1725.0,1124.0,287.0,2452.0,4600.0,2030.0,9443.0,3407.0,9198.0,661.0,1903.0,1919.0,4363.0,97.0
Jul-02 to Sep-02,94436.0,20844.0,2912.0,1563.0,410.0,3732.0,6900.0,3381.0,15496.0,5231.0,17555.0,1024.0,3079.0,3922.0,8207.0,176.0


In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Period,Area,Measure Type,Unit
0,69701.0,Jan-02 to Mar-02,TOTAL,Count,People
1,18150.0,Jan-02 to Mar-02,European Union EU15,Count,People
2,2217.0,Jan-02 to Mar-02,European Union EU8,Count,People
3,1269.0,Jan-02 to Mar-02,European Union EU2,Count,People
4,455.0,Jan-02 to Mar-02,European Union Other,Count,People
5,3114.0,Jan-02 to Mar-02,Other Europe,Count,People
6,4411.0,Jan-02 to Mar-02,Middle East and Central Asia,Count,People
7,2390.0,Jan-02 to Mar-02,East Asia,Count,People
8,10591.0,Jan-02 to Mar-02,South Asia,Count,People
9,4063.0,Jan-02 to Mar-02,South East Asia,Count,People


In [11]:
new_table.count()

OBS             1040
Period          1040
Area            1040
Measure Type    1040
Unit            1040
dtype: int64

In [12]:
new_table.dtypes

OBS             float64
Period           object
Area             object
Measure Type     object
Unit             object
dtype: object

In [13]:
new_table['Value'] = new_table['OBS'].astype(int)

In [14]:
new_table = new_table[['Period','Area','Measure Type','Value','Unit']]

In [15]:
new_table.head()

Unnamed: 0,Period,Area,Measure Type,Value,Unit
0,Jan-02 to Mar-02,TOTAL,Count,69701,People
1,Jan-02 to Mar-02,European Union EU15,Count,18150,People
2,Jan-02 to Mar-02,European Union EU8,Count,2217,People
3,Jan-02 to Mar-02,European Union EU2,Count,1269,People
4,Jan-02 to Mar-02,European Union Other,Count,455,People


In [16]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('nin1.csv'), index = False)

In [17]:
writeMetadata(metadata,
              'National Insurance Number Allocations to Adult Overseas Nationals',
              ods_title, 'Migration')

In [18]:
new_table.count()

Period          1040
Area            1040
Measure Type    1040
Value           1040
Unit            1040
dtype: int64