# Migration between Scotland and overseas by age

Tabs: `SYOA Persons (2001-)` and `SYOA Males (2001-)`

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.nrscotland.gov.uk/statistics-and-data/statistics/' \
                      'statistics-by-theme/migration/migration-statistics/migration-between-scotland-and-overseas')
#     scraper.run()
    distribution = scraper.distribution(
        mediaType='application/vnd.ms-excel',
        title='Migration between Scotland and overseas by age')
    display(distribution)
    tabs = distribution.as_databaker()

In [2]:
Final_table = pd.DataFrame()

In [3]:
tab = [t for t in tabs if t.name == 'SYOA Males (2001-)'][0]
cell = tab.filter('Year')
age = cell.fill(RIGHT).is_not_blank().is_not_whitespace() | cell.shift(0,1).fill(RIGHT).is_not_blank().is_not_whitespace()
year = cell.shift(0,1).expand(DOWN).is_not_blank().is_not_whitespace()
flow = tab.filter(contains_string('migration')).is_not_blank().is_not_whitespace()
observations = age.fill(DOWN).is_not_blank().is_not_whitespace() 
observations = observations- cell.shift(0,1).fill(RIGHT) - year - flow

Dimensions = [
            HDim(year,'Mid Year',CLOSEST, ABOVE),
            HDim(flow,'Flow',CLOSEST, ABOVE),
            HDim(age,'Age',DIRECTLY, ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Sex','M'),
            HDimConst('Domestic geography','Scotland')
    ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
new_table = c1.topandas()
new_table['Flow'] = new_table['Flow'].map(
    lambda x: {
        'In migration of males from overseas 2001-02 to latest' : 'Inflow', 
        'Out migration of males to overseas 2001-02 to latest' : 'Outflow',
        'Net migration of males from overseas 2001-02 to latest': 'Balance' 
        }.get(x, x))
new_table['Age'] = new_table['Age'].astype(str)
new_table['Age'] = new_table['Age'].map(lambda cell:cell.replace('All ages', 'all'))
new_table['Age'] = 'year/' + new_table['Age']
new_table['Age'] = new_table['Age'].map(lambda cell:cell.replace('.0', ''))
new_table.dropna(subset=['Mid Year'], inplace=True)
new_table['Mid Year'] = new_table['Mid Year'].map(lambda x: str(x)[0:4]) + '-06-30T00:00:00/P1Y'
Final_table = pd.concat([Final_table, new_table])




In [4]:
import numpy as np
Final_table['OBS'].replace('', np.nan, inplace=True)
Final_table.dropna(subset=['OBS'], inplace=True)
if 'DATAMARKER' in Final_table.columns:
    Final_table.drop(columns=['DATAMARKER'], inplace=True)
Final_table.rename(columns={'OBS': 'Value'}, inplace=True)
Final_table['Value'] = Final_table['Value'].astype(int)

In [5]:
Final_table['Foreign geography'] = 'nrs/overseas'

In [6]:
Final_table = Final_table[['Domestic geography','Foreign geography','Mid Year','Sex','Age','Flow','Measure Type','Value','Unit']]

In [7]:
Final_table

Unnamed: 0,Domestic geography,Foreign geography,Mid Year,Sex,Age,Flow,Measure Type,Value,Unit
0,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/all,Inflow,Count,14750,People
1,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/0,Inflow,Count,149,People
2,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/1,Inflow,Count,214,People
3,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/2,Inflow,Count,223,People
4,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/3,Inflow,Count,202,People
5,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/4,Inflow,Count,188,People
6,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/5,Inflow,Count,143,People
7,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/6,Inflow,Count,135,People
8,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/7,Inflow,Count,118,People
9,Scotland,nrs/overseas,2001-06-30T00:00:00/P1Y,M,year/8,Inflow,Count,144,People
