# Migration between administrative areas and overseas by sex

Tab: `In-Council Area-Sex`

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.nrscotland.gov.uk/statistics-and-data/statistics/' \
                      'statistics-by-theme/migration/migration-statistics/migration-between-scotland-and-overseas')
#     scraper.run()
    distribution = scraper.distribution(
        mediaType='application/vnd.ms-excel',
        title='Migration between administrative areas and overseas by sex')
    display(distribution)
    tab = [tab for tab in distribution.as_databaker() if tab.name == 'In-Council Area-Sex'][0]

In [2]:
cell = tab.filter('Council areas')
flow = cell.fill(RIGHT).is_not_blank().is_not_whitespace()
midyear = cell.shift(0,2).expand(RIGHT).is_not_blank().is_not_whitespace()
observations = midyear.shift(0,1).expand(DOWN).is_not_blank().is_not_whitespace().is_not_bold() 
observations = observations.filter(lambda x: type(x.value) != str or 'Year' not in x.value) - midyear -flow
area = cell.expand(DOWN).is_not_blank().is_not_whitespace()

In [3]:
Dimensions = [
            HDim(midyear,'Mid Year',DIRECTLY,ABOVE),
            HDim(area,'Domestic geography', DIRECTLY, LEFT),
            HDim(flow,'flow',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Flow','inflow'),
            HDimConst('Age', 'all')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)
tidy = c1.topandas()




In [4]:
tidy['Mid Year'] = tidy['Mid Year'].map(lambda x: str(x)[0:4]) + '-06-30T00:00:00/P1Y'
tidy['Foreign geography'] = 'nrs/overseas'

In [5]:
for col in tidy.columns:
    if col not in ['OBS']:
        tidy[col] = tidy[col].astype('category')
        display(col)
        display(tidy[col].cat.categories)

'Mid Year'

Index(['2001-06-30T00:00:00/P1Y', '2002-06-30T00:00:00/P1Y',
       '2003-06-30T00:00:00/P1Y', '2004-06-30T00:00:00/P1Y',
       '2005-06-30T00:00:00/P1Y', '2006-06-30T00:00:00/P1Y',
       '2007-06-30T00:00:00/P1Y', '2008-06-30T00:00:00/P1Y',
       '2009-06-30T00:00:00/P1Y', '2010-06-30T00:00:00/P1Y',
       '2011-06-30T00:00:00/P1Y', '2012-06-30T00:00:00/P1Y',
       '2013-06-30T00:00:00/P1Y', '2014-06-30T00:00:00/P1Y',
       '2015-06-30T00:00:00/P1Y', '2016-06-30T00:00:00/P1Y'],
      dtype='object')

'Domestic geography'

Index(['Aberdeen City', 'Aberdeenshire', 'Angus', 'Argyll & Bute',
       'City of Edinburgh', 'Clackmannanshire', 'Dumfries and Galloway',
       'Dundee City', 'East Ayrshire', 'East Dunbartonshire', 'East Lothian',
       'East Renfrewshire', 'Falkirk', 'Fife', 'Glasgow City', 'Highland',
       'Inverclyde', 'Midlothian', 'Moray', 'Na h-Eileanan Siar',
       'North Ayrshire', 'North Lanarkshire', 'Orkney Islands',
       'Perth and Kinross', 'Renfrewshire', 'Scottish Borders',
       'Shetland Islands', 'South Ayrshire', 'South Lanarkshire', 'Stirling',
       'West Dunbartonshire', 'West Lothian'],
      dtype='object')

'flow'

Index(['In-migration - Females', 'In-migration - Males',
       'In-migration - Persons'],
      dtype='object')

'Measure Type'

Index(['Count'], dtype='object')

'Unit'

Index(['People'], dtype='object')

'Flow'

Index(['inflow'], dtype='object')

'Age'

Index(['all'], dtype='object')

'Foreign geography'

Index(['nrs/overseas'], dtype='object')

In [6]:
tidy['Sex'] = tidy['flow'].map(lambda x: str(x)[15:])
tidy['Sex'] = tidy['Sex'].map(
    lambda x: {
        'Persons' : 'T', 
        'Females' : 'F',
        'Males': 'M' 
        }.get(x, x))

In [7]:
import numpy as np
tidy['OBS'].replace('', np.nan, inplace=True)
tidy.dropna(subset=['OBS'], inplace=True)
if 'DATAMARKER' in tidy.columns:
    tidy.drop(columns=['DATAMARKER'], inplace=True)
tidy.rename(columns={'OBS': 'Value'}, inplace=True)
tidy['Value'] = tidy['Value'].astype(int)

In [8]:
tidy = tidy[['Domestic geography','Foreign geography','Mid Year','Sex','Age','Flow','Measure Type','Value','Unit']]

In [9]:
tidy

Unnamed: 0,Domestic geography,Foreign geography,Mid Year,Sex,Age,Flow,Measure Type,Value,Unit
0,Aberdeen City,nrs/overseas,2001-06-30T00:00:00/P1Y,T,all,inflow,Count,2956,People
1,Aberdeen City,nrs/overseas,2002-06-30T00:00:00/P1Y,T,all,inflow,Count,2578,People
2,Aberdeen City,nrs/overseas,2003-06-30T00:00:00/P1Y,T,all,inflow,Count,2893,People
3,Aberdeen City,nrs/overseas,2004-06-30T00:00:00/P1Y,T,all,inflow,Count,4322,People
4,Aberdeen City,nrs/overseas,2005-06-30T00:00:00/P1Y,T,all,inflow,Count,4459,People
5,Aberdeen City,nrs/overseas,2006-06-30T00:00:00/P1Y,T,all,inflow,Count,5350,People
6,Aberdeen City,nrs/overseas,2007-06-30T00:00:00/P1Y,T,all,inflow,Count,5409,People
7,Aberdeen City,nrs/overseas,2008-06-30T00:00:00/P1Y,T,all,inflow,Count,5864,People
8,Aberdeen City,nrs/overseas,2009-06-30T00:00:00/P1Y,T,all,inflow,Count,5851,People
9,Aberdeen City,nrs/overseas,2010-06-30T00:00:00/P1Y,T,all,inflow,Count,5284,People
