Home Office, Immigration Statistics October to December 2016, Asylum table as 01 q (Asylum volume 1)

In [1]:
def is_interactive():
    import __main__ as main
    return not hasattr(main, '__file__')

if is_interactive():
    %run lib/scrape_govuk.ipynb
    metadata = scrape('https://www.gov.uk/government/statistics/immigration-statistics-october-to-december-2017-data-tables')
    import pyexcel
    from io import BytesIO

    ods_files = [f for f in metadata['files']
                 if f['type'] == 'ODS' and
                 f['title'] == 'Asylum data tables immigration statistics October to December 2017 volume 1 second edition']
    assert len(ods_files) == 1, 'Should be exactly one matching ODS file'

    ods_url = ods_files[0]['url']
    ods_title = ods_files[0]['title']

    ods_file = BytesIO(session.get(ods_files[0]['url']).content)

    data = pyexcel.get_sheet(file_content=ods_file, file_type='ods', sheet_name='as_01_q', library='pyexcel-ods3')

In [2]:
df = pd.DataFrame(data.get_array())

In [3]:
observations = df.loc[3:, :26]
observations.rename(columns= observations.iloc[0], inplace=True)
observations.drop(observations.index[0], inplace = True)
observations.drop('Geographical region', axis = 1, inplace = True)
Final_table = pd.melt(observations,
                       ['Quarter','Country of nationality'],
                       var_name= 'Asylum Application',
                       value_name="Value")
Final_table.Value.dropna(inplace =True)
Final_table.rename(columns={'Country of nationality': 'Nationality'}, inplace=True)
Final_table['Unit'] = 'Applications'
Final_table['Measure Type'] = 'Count'

In [4]:
Final_table['Value'] = Final_table['Value'].map(lambda x : ''
                                                   if (x == '.') | (x == 'z') | ( x == ':')
                                                    else x )

In [5]:
import numpy as np
Final_table['Value'].replace('', np.nan, inplace=True)
Final_table.dropna(subset=['Value'], inplace=True)
Final_table['Value'] = Final_table['Value'].apply(lambda x: pd.to_numeric(x, downcast='integer'))
Final_table['Value'] = Final_table['Value'].astype(int)

In [6]:
Final_table.rename(columns={'Quarter': 'Period'}, inplace=True)
Final_table['Period'] = Final_table['Period'].astype(str)
Final_table = Final_table[Final_table['Period'] != '']
Final_table['Period'] = 'quarter/' + Final_table['Period'].map(lambda cell: cell.replace(' ','-'))

In [7]:
Final_table['Nationality'] = Final_table['Nationality'].str.lstrip('*')

In [8]:
Final_table['Nationality'] = Final_table['Nationality'].map(
    lambda x: {
        'Total Africa North' : 'Africa North',
        'Total Africa Sub-Saharan' : 'Africa Sub-Saharan',
        'Total America North' : 'America North',
        'Total America Central and South ' : 'America Central and South',
        'Total Asia Central' : 'Asia Central',
        'Total Asia East' : 'Asia East',
        'Total Asia South' : 'Asia South',
        'Total Asia South East' : 'Asia South East',
        'Total EU 14' : 'EU 14',
        'Total EU 2' : 'EU 2',
        'Total EU 8' : 'EU 8',
        'Total EU Other' : 'EU Other',
        'Total Europe Other' : 'Europe Other',
        'Total Middle East' : 'Middle East',
        'Total Oceania' : 'Oceania',
        'Total Other' : 'Other'                    
        }.get(x, x))

In [9]:
import urllib.request as request
import csv
import io
import requests
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_migration/master/codelists/ho-country-of-nationality.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)

url="https://raw.githubusercontent.com/ONS-OpenData/ref_migration/master/codelists/ho-country-of-nationality.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))
Final_table = pd.merge(Final_table, c, how = 'left', left_on = 'Nationality', right_on = 'Label')
Final_table.columns = ['Country of nationality' if x=='Notation' else x for x in Final_table.columns]

In [10]:
Final_table = Final_table[['Country of nationality','Period','Asylum Application','Measure Type','Value','Unit']]

In [11]:
Final_table['Asylum Application'] = Final_table['Asylum Application'].map(
    lambda x: {
        'Pending further \nreview': 'Pending further review',
        'Other \ngrants' : 'Other grants',
        'Total pending initial \ndecision' : 'Total pending initial decision'                
        }.get(x, x))

In [12]:
for col in Final_table.columns:
    if col not in ['Value', 'Period']:
        Final_table[col] = Final_table[col].astype('category')
        display(col)
        display(Final_table[col].cat.categories)

'Country of nationality'

Index(['afghanistan', 'africa-north', 'africa-sub-saharan', 'albania',
       'algeria', 'america-central-and-south', 'america-north',
       'american-samoa', 'andorra', 'angola',
       ...
       'vatican-city', 'venezuela', 'vietnam', 'virgin-islands-british',
       'virgin-islands-us', 'wallis-and-futuna', 'western-sahara', 'yemen',
       'zambia', 'zimbabwe'],
      dtype='object', length=264)

'Asylum Application'

Index(['3rd country refusals', 'Applications received at ports',
       'Applications received in-country', 'Certified refusals',
       'Fresh claims', 'Grants of DL', 'Grants of ELR', 'Grants of HP',
       'Grants of asylum', 'Non-compliance refusals',
       'Non-substantiated withdrawals', 'Other grants', 'Other refusals',
       'Pending further review',
       'Pending initial decision for less than or equal to 6 months',
       'Pending initial decision for more than 6 months', 'Total applications',
       'Total grants', 'Total initial decisions', 'Total pending',
       'Total pending initial decision', 'Total refusals',
       'Total withdrawals'],
      dtype='object')

'Measure Type'

Index(['Count'], dtype='object')

'Unit'

Index(['Applications'], dtype='object')

In [13]:
import urllib.request as request
import csv
import io
import requests
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_migration/master/codelists/asylum-application-status.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)

url="https://raw.githubusercontent.com/ONS-OpenData/ref_migration/master/codelists/asylum-application-status.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))
Final_table = pd.merge(Final_table, c, how = 'left', left_on = 'Asylum Application', right_on = 'Label')
Final_table.columns = ['Asylum Application Status' if x=='Notation' else x for x in Final_table.columns]

In [14]:
Final_table = Final_table[['Country of nationality','Period','Asylum Application Status','Measure Type','Value','Unit']]

In [15]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('as_01_q.csv'), index = False)