# Migration between administrative areas and overseas by sex

Tab: `In-Council Area-Sex`

In [11]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/' \
                    'migration/migration-statistics/migration-flows/migration-between-scotland-and-overseas')
#     scraper.run()
    distribution = scraper.distribution(
        mediaType='application/vnd.ms-excel',
        title='Migration between administrative areas and overseas by sex')
    display(distribution)
    tab = [tab for tab in distribution.as_databaker() if tab.name == 'In-Council Area-Sex'][0]

In [12]:
cell = tab.filter(contains_string('Council areas'))
flow = cell.fill(RIGHT).is_not_blank().is_not_whitespace()
midyear = cell.shift(0,2).expand(RIGHT).is_not_blank().is_not_whitespace()
observations = midyear.shift(0,1).expand(DOWN).is_not_blank().is_not_whitespace().is_not_bold() 
observations = observations.filter(lambda x: type(x.value) != str or 'Year' not in x.value) - midyear -flow
area = cell.expand(DOWN).is_not_blank().is_not_whitespace()

In [13]:
Dimensions = [
            HDim(midyear,'Mid Year',DIRECTLY,ABOVE),
            HDim(area,'Domestic geography', DIRECTLY, LEFT),
            HDim(flow,'flow',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Flow','inflow'),
            HDimConst('Age', 'all')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
#savepreviewhtml(c1)
tidy = c1.topandas()




In [14]:
tidy['Mid Year'] = tidy['Mid Year'].map(lambda x: str(x)[0:4]) + '-06-30T00:00:00/P1Y'
tidy['Foreign geography'] = 'nrs/overseas'

In [15]:
for col in tidy.columns:
    if col not in ['OBS']:
        tidy[col] = tidy[col].astype('category')
        display(col)
        display(tidy[col].cat.categories)

'Mid Year'

Index(['2001-06-30T00:00:00/P1Y', '2002-06-30T00:00:00/P1Y',
       '2003-06-30T00:00:00/P1Y', '2004-06-30T00:00:00/P1Y',
       '2005-06-30T00:00:00/P1Y', '2006-06-30T00:00:00/P1Y',
       '2007-06-30T00:00:00/P1Y', '2008-06-30T00:00:00/P1Y',
       '2009-06-30T00:00:00/P1Y', '2010-06-30T00:00:00/P1Y',
       '2011-06-30T00:00:00/P1Y', '2012-06-30T00:00:00/P1Y',
       '2013-06-30T00:00:00/P1Y', '2014-06-30T00:00:00/P1Y',
       '2015-06-30T00:00:00/P1Y', '2016-06-30T00:00:00/P1Y',
       '2017-06-30T00:00:00/P1Y'],
      dtype='object')

'Domestic geography'

Index(['S12000005', 'S12000006', 'S12000008', 'S12000010', 'S12000011',
       'S12000013', 'S12000014', 'S12000017', 'S12000018', 'S12000019',
       'S12000020', 'S12000021', 'S12000023', 'S12000026', 'S12000027',
       'S12000028', 'S12000029', 'S12000030', 'S12000033', 'S12000034',
       'S12000035', 'S12000036', 'S12000038', 'S12000039', 'S12000040',
       'S12000041', 'S12000042', 'S12000044', 'S12000045', 'S12000046',
       'S12000047', 'S12000048', 'S92000003'],
      dtype='object')

'flow'

Index(['In-migration - Females', 'In-migration - Males',
       'In-migration - Persons'],
      dtype='object')

'Measure Type'

Index(['Count'], dtype='object')

'Unit'

Index(['People'], dtype='object')

'Flow'

Index(['inflow'], dtype='object')

'Age'

Index(['all'], dtype='object')

'Foreign geography'

Index(['nrs/overseas'], dtype='object')

In [16]:
tidy['Sex'] = tidy['flow'].map(lambda x: str(x)[15:])
tidy['Sex'] = tidy['Sex'].map(
    lambda x: {
        'Persons' : 'T', 
        'Females' : 'F',
        'Males': 'M' 
        }.get(x, x))

In [17]:
import numpy as np
tidy['OBS'].replace('', np.nan, inplace=True)
tidy.dropna(subset=['OBS'], inplace=True)
if 'DATAMARKER' in tidy.columns:
    tidy.drop(columns=['DATAMARKER'], inplace=True)
tidy.rename(columns={'OBS': 'Value'}, inplace=True)
tidy['Value'] = tidy['Value'].astype(int)

In [18]:
tidy = tidy[['Domestic geography','Foreign geography','Mid Year','Sex','Age','Flow','Measure Type','Value','Unit']]

In [19]:
tidy

Unnamed: 0,Domestic geography,Foreign geography,Mid Year,Sex,Age,Flow,Measure Type,Value,Unit
0,S92000003,nrs/overseas,2001-06-30T00:00:00/P1Y,T,all,inflow,Count,27800,People
1,S92000003,nrs/overseas,2002-06-30T00:00:00/P1Y,T,all,inflow,Count,25500,People
2,S92000003,nrs/overseas,2003-06-30T00:00:00/P1Y,T,all,inflow,Count,28500,People
3,S92000003,nrs/overseas,2004-06-30T00:00:00/P1Y,T,all,inflow,Count,41800,People
4,S92000003,nrs/overseas,2005-06-30T00:00:00/P1Y,T,all,inflow,Count,41300,People
5,S92000003,nrs/overseas,2006-06-30T00:00:00/P1Y,T,all,inflow,Count,45100,People
6,S92000003,nrs/overseas,2007-06-30T00:00:00/P1Y,T,all,inflow,Count,45200,People
7,S92000003,nrs/overseas,2008-06-30T00:00:00/P1Y,T,all,inflow,Count,45100,People
8,S92000003,nrs/overseas,2009-06-30T00:00:00/P1Y,T,all,inflow,Count,47400,People
9,S92000003,nrs/overseas,2010-06-30T00:00:00/P1Y,T,all,inflow,Count,44200,People
