Table 1.3: Estimated Net International Migration, by Age and Gender (mid-2016 to mid-2017)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Mig1617-Official.xlsx'
inputFile = sourceFolder / 'Mig1617-Official.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 1.3')[0]

Loading in\Mig1617-Official.xlsx which has size 194382 bytes
Table names: ['Table 1.3']


In [4]:
tab

{<B4 327.0>, <B15 -170.0>, <A15 '25-34'>, <A8 '35-44'>, <B17 1.0>, <A20 'Total'>, <A13 'Less than 18 years'>, <B19 -40.0>, <A1 'Table 1.3: Estimated Net International Migration, by Age and Gender (mid-2016 to mid-2017)'>, <A7 '25-34'>, <B10 -57.0>, <A14 '18-24'>, <B6 25.0>, <A16 '35-44'>, <B11 -73.0>, <B7 -95.0>, <B13 511.0>, <B18 -15.0>, <B16 -83.0>, <A19 '65 years and over'>, <B5 496.0>, <B1 ''>, <A11 '65 years and over'>, <A12 'Female'>, <B20 583.0>, <A4 'Male '>, <B2 ''>, <B14 52.0>, <A18 '55-64'>, <A6 '18-24'>, <B9 46.0>, <B8 -15.0>, <A17 '45-54'>, <B3 'Net International Migration'>, <A2 ''>, <B12 256.0>, <A5 'Less than 18 years'>, <A9 '45-54'>, <A3 'Gender / Age'>, <A10 '55-64'>}

In [5]:
observations = tab.excel_ref('B4').expand(DOWN).is_not_blank()

In [6]:
observations

{<B4 327.0>, <B15 -170.0>, <B13 511.0>, <B18 -15.0>, <B9 46.0>, <B16 -83.0>, <B5 496.0>, <B8 -15.0>, <B20 583.0>, <B12 256.0>, <B10 -57.0>, <B14 52.0>, <B6 25.0>, <B17 1.0>, <B19 -40.0>, <B11 -73.0>, <B7 -95.0>}

In [7]:
Flow = tab.excel_ref('B3')
Flow

{<B3 'Net International Migration'>}

In [8]:
sex = tab.excel_ref('A').expand(DOWN).by_index([4,12])
sex

{<A12 'Female'>, <A4 'Male '>}

In [9]:
Age = tab.excel_ref('A4').expand(DOWN).is_not_blank() 
Age

{<A15 '25-34'>, <A19 '65 years and over'>, <A17 '45-54'>, <A7 '25-34'>, <A11 '65 years and over'>, <A8 '35-44'>, <A12 'Female'>, <A4 'Male '>, <A5 'Less than 18 years'>, <A9 '45-54'>, <A14 '18-24'>, <A18 '55-64'>, <A16 '35-44'>, <A20 'Total'>, <A13 'Less than 18 years'>, <A6 '18-24'>, <A10 '55-64'>}

In [10]:
Dimensions = [
            HDim(Flow,'Flow',DIRECTLY,ABOVE),
            HDim(sex,'Sex',CLOSEST, ABOVE),
            HDim(Age,'Age',DIRECTLY, LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [11]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [12]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Flow,Sex,Age,Measure Type,Unit
0,327.0,Net International Migration,Male,Male,Count,People
1,496.0,Net International Migration,Male,Less than 18 years,Count,People
2,25.0,Net International Migration,Male,18-24,Count,People
3,-95.0,Net International Migration,Male,25-34,Count,People
4,-15.0,Net International Migration,Male,35-44,Count,People
5,46.0,Net International Migration,Male,45-54,Count,People
6,-57.0,Net International Migration,Male,55-64,Count,People
7,-73.0,Net International Migration,Male,65 years and over,Count,People
8,256.0,Net International Migration,Female,Female,Count,People
9,511.0,Net International Migration,Female,Less than 18 years,Count,People


In [13]:
new_table['OBS'] = new_table['OBS'].astype(int)

In [14]:
new_table.dtypes

OBS              int32
Flow            object
Sex             object
Age             object
Measure Type    object
Unit            object
dtype: object

In [15]:
new_table['Age'] = new_table['Age'].map(lambda cell:cell.replace('Total', 'all'))

In [16]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [17]:
new_table['Age'] = new_table['Age'].map(
    lambda x: {
        'Female' : 'all years', 
        'Male ' : 'all years'
        }.get(x, x))

In [18]:
new_table = new_table[['Age','Sex','Flow','Measure Type','Value','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Age,Sex,Flow,Measure Type,Value,Unit
0,all years,Male,Net International Migration,Count,327,People
1,Less than 18 years,Male,Net International Migration,Count,496,People
2,18-24,Male,Net International Migration,Count,25,People
3,25-34,Male,Net International Migration,Count,-95,People
4,35-44,Male,Net International Migration,Count,-15,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tidydata1.3.csv'), index = False)