https://www.nrscotland.gov.uk/files//statistics/migration/2018-july/tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.nrscotland.gov.uk/files//statistics/migration/2018-july/tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx'
inputFile = sourceFolder / 'tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.nrscotland.gov.uk/files//statistics/migration/2018-july/tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx

In [3]:
sheetid = ['AG 2001-02', 'AG 2002-03','AG 2003-04','AG 2004-05','AG 2005-06','AG 2006-07',
           'AG 2007-08','AG 2008-09','AG 2009-10','AG 2010-11','AG 2011-12','AG 2012-13',
            'AG 2013-14','AG 2014-15','AG 2015-16','AG 2016-17']

In [4]:
Final_table = pd.DataFrame()

In [5]:
for sheet in sheetid:
    tab = loadxlstabs(inputFile, sheetids = sheet)[0]
    observations = tab.excel_ref("B7").expand(RIGHT).expand(DOWN).is_not_blank().is_not_whitespace()
    area = tab.excel_ref('A').expand(DOWN).by_index([4,11,18,23])
    age = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
    flow = tab.excel_ref('A7').expand(DOWN)
    per = tab.excel_ref('B6').expand(RIGHT).is_not_blank()
    Dimensions = [
            HDimConst('Mid Year',str(sheet[3:7])+'-06-30T00:00:00/P1Y'),
            HDim(flow,'Flow',DIRECTLY, LEFT),
            HDim(age,'Age',DIRECTLY, ABOVE),
            HDim(per,'per',DIRECTLY, ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Sex','T'),
            HDimConst('Area of Destination or Origin','Movements between Scotland and the rest of the UK')
            ]
    c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
    new_table = c1.topandas()
    new_table = new_table[new_table['per'] != '%']
    new_table['Value'] = pd.to_numeric(new_table['OBS'], errors='coerce').fillna(0)
    new_table['Value'] = new_table['Value'].astype(int)
    new_table = new_table[new_table['Flow'] != '']
    new_table['Flow'] = new_table['Flow'].map(
    lambda x: {
        'IN' : 'Inflow', 
        'OUT' : 'Outflow',
        'NET': 'Balance' 
        }.get(x, x))
    new_table['Age'] = new_table['Age'].map(lambda cell:cell.replace('+', '-plus'))
    new_table['Age'] = new_table['Age'].map(lambda cell:cell.replace('All ages', 'all'))
    new_table['Age'] = 'nrs/' + new_table['Age']
    new_table = new_table[new_table['Value'] != 0 ]
    new_table = new_table[['Area of Destination or Origin','Mid Year','Sex','Age','Flow','Measure Type','Value','Unit']]
    Final_table = pd.concat([Final_table, new_table])

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2001-02']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2002-03']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2003-04']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2004-05']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2005-06']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2006-07']

Loading in\tab-z2-overseas-mig-flows-by-age-scotland-2001-02-latest-july-18.xlsx which has size 210662 bytes
Table names: ['AG 2007-08']

Loading in\tab-z2-overseas-mig-flo

In [6]:
Final_table

Unnamed: 0,Area of Destination or Origin,Mid Year,Sex,Age,Flow,Measure Type,Value,Unit
0,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/0-15,Inflow,Count,9311,People
2,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/16-24,Inflow,Count,12787,People
4,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/25-34,Inflow,Count,13713,People
6,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/35-44,Inflow,Count,8038,People
8,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/45-54,Inflow,Count,4468,People
10,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/55-64,Inflow,Count,3257,People
12,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/65-74,Inflow,Count,1569,People
14,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/75-84,Inflow,Count,882,People
16,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/85-plus,Inflow,Count,372,People
18,Movements between Scotland and the rest of the UK,2001-06-30T00:00:00/P1Y,T,nrs/all,Inflow,Count,54397,People


In [7]:
Final_table.count()

Area of Destination or Origin    1179
Mid Year                         1179
Sex                              1179
Age                              1179
Flow                             1179
Measure Type                     1179
Value                            1179
Unit                             1179
dtype: int64