Table as_16_q: Asylum seekers in receipt of Section 95 support, by local authority, as at end of quarter

In [1]:
%run lib/scrape_govuk.ipynb
metadata = scrape('https://www.gov.uk/government/statistics/immigration-statistics-october-to-december-2017-data-tables')

In [2]:
import pyexcel
from io import BytesIO

ods_files = [f for f in metadata['files']
             if f['type'] == 'ODS' and
             f['title'] == 'Asylum data tables immigration statistics October to December 2017 volume 4']
assert len(ods_files) == 1, 'Should be exactly one matching ODS file'

ods_url = ods_files[0]['url']
ods_title = ods_files[0]['title']

ods_file = BytesIO(session.get(ods_files[0]['url']).content)

data = pyexcel.get_sheet(file_content=ods_file, file_type='ods', sheet_name='as_16_q')

In [3]:
df = pd.DataFrame(data.get_array())
df

Unnamed: 0,0,1,2,3,4,5,6
0,Table as_16_q: Asylum seekers in receipt of Se...,,,,,,
1,Back to contents,,,,,,
2,,,,,,,
3,Quarter,Region,Local Authority,Total supported under Section 95,In receipt of subsistence \nonly,In dispersed accommodation,Disbenefited
4,2003 Q4,*Total,*Total,80123,30362,47148,2613
5,2003 Q4,East Midlands,*Total East Midlands,4524,1027,3465,32
6,2003 Q4,East of England,*Total East of England,1944,1216,655,73
7,2003 Q4,London,*Total London,24916,22017,746,2153
8,2003 Q4,North East,*Total North East,5172,216,4930,26
9,2003 Q4,North West,*Total North West,9093,960,8074,59


In [4]:
observations = df.iloc[2:, :]
observations.rename(columns= observations.iloc[1], inplace=True)
observations.drop(observations.index[0])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,Quarter,Region,Local Authority,Total supported under Section 95,In receipt of subsistence only,In dispersed accommodation,Disbenefited
3,Quarter,Region,Local Authority,Total supported under Section 95,In receipt of subsistence \nonly,In dispersed accommodation,Disbenefited
4,2003 Q4,*Total,*Total,80123,30362,47148,2613
5,2003 Q4,East Midlands,*Total East Midlands,4524,1027,3465,32
6,2003 Q4,East of England,*Total East of England,1944,1216,655,73
7,2003 Q4,London,*Total London,24916,22017,746,2153
8,2003 Q4,North East,*Total North East,5172,216,4930,26
9,2003 Q4,North West,*Total North West,9093,960,8074,59
10,2003 Q4,Northern Ireland,*Total Northern Ireland,153,17,136,0
11,2003 Q4,Other and Unknown,*Total Other and Unknown,:,:,:,:
12,2003 Q4,Scotland,*Total Scotland,5818,255,5563,0


In [5]:
new_table = pd.melt(observations,
                       ['Quarter','Region','Local Authority'],
                       var_name="Total supported under Section 95",
                       value_name="Value")

In [6]:
new_table = new_table.drop(new_table.index[0])

In [7]:
new_table.count()

Quarter                             104507
Region                              104507
Local Authority                     104507
Total supported under Section 95    104507
Value                               104507
dtype: int64

In [8]:
new_table.dropna(inplace=True)

In [9]:
new_table = new_table[new_table['Value'] != '.']

In [10]:
new_table = new_table[new_table['Value'] != 0 ]

In [11]:
new_table = new_table[new_table['Value'] != 'z']

In [12]:
new_table.count()

Quarter                             47186
Region                              47186
Local Authority                     47186
Total supported under Section 95    47186
Value                               47186
dtype: int64

In [13]:
new_table['Unit'] = 'People'
new_table['Measure Type'] = 'Count'

In [14]:
new_table.dtypes

Quarter                             object
Region                              object
Local Authority                     object
Total supported under Section 95    object
Value                               object
Unit                                object
Measure Type                        object
dtype: object

In [15]:
new_table.head()

Unnamed: 0,Quarter,Region,Local Authority,Total supported under Section 95,Value,Unit,Measure Type
1,Quarter,Region,Local Authority,Total supported under Section 95,Total supported under Section 95,People,Count
2,2003 Q4,*Total,*Total,Total supported under Section 95,80123,People,Count
3,2003 Q4,East Midlands,*Total East Midlands,Total supported under Section 95,4524,People,Count
4,2003 Q4,East of England,*Total East of England,Total supported under Section 95,1944,People,Count
5,2003 Q4,London,*Total London,Total supported under Section 95,24916,People,Count


In [16]:
new_table = new_table.drop(new_table.index[0])

In [17]:
new_table['Total supported under Section 95'].unique()

array(['Total supported under Section 95',
       'In receipt of subsistence \nonly', 'In dispersed accommodation',
       'Disbenefited'], dtype=object)

In [18]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce').fillna(0)

In [19]:
new_table['Value'] = new_table['Value'].astype(int)

In [20]:
new_table['Period'] = new_table['Quarter'].astype(str)

In [22]:
new_table.head()

Unnamed: 0,Quarter,Region,Local Authority,Total supported under Section 95,Value,Unit,Measure Type,Period
2,2003 Q4,*Total,*Total,Total supported under Section 95,80123,People,Count,2003 Q4
3,2003 Q4,East Midlands,*Total East Midlands,Total supported under Section 95,4524,People,Count,2003 Q4
4,2003 Q4,East of England,*Total East of England,Total supported under Section 95,1944,People,Count,2003 Q4
5,2003 Q4,London,*Total London,Total supported under Section 95,24916,People,Count,2003 Q4
6,2003 Q4,North East,*Total North East,Total supported under Section 95,5172,People,Count,2003 Q4


In [23]:
new_table['Region'] = new_table['Region'].str.lstrip('*')

In [24]:
new_table['Local Authority'] = new_table['Local Authority'].str.lstrip('*')

In [25]:
new_table['Period'] = new_table['Period'].map(lambda cell: cell.replace(' ', '-'))

In [26]:
new_table['Support received'] = new_table['Total supported under Section 95'].map(lambda cell: cell.replace('In receipt of subsistence \nonly', 'In receipt of subsistence only'))

In [27]:
new_table = new_table[['Period','Region','Local Authority','Support received','Measure Type','Value','Unit']]

In [28]:
new_table.head()

Unnamed: 0,Period,Region,Local Authority,Support received,Measure Type,Value,Unit
2,2003-Q4,Total,Total,Total supported under Section 95,Count,80123,People
3,2003-Q4,East Midlands,Total East Midlands,Total supported under Section 95,Count,4524,People
4,2003-Q4,East of England,Total East of England,Total supported under Section 95,Count,1944,People
5,2003-Q4,London,Total London,Total supported under Section 95,Count,24916,People
6,2003-Q4,North East,Total North East,Total supported under Section 95,Count,5172,People


In [29]:
new_table = new_table[new_table['Value'] != 0 ]

In [30]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('as_16_q.csv'), index = False)

writeMetadata(metadata,
              'Asylum seekers receiving support',
              ods_title, 'Migration')

In [31]:
new_table.count()

Period              34010
Region              34010
Local Authority     34010
Support received    34010
Measure Type        34010
Value               34010
Unit                34010
dtype: int64

In [32]:
new_table.dtypes

Period              object
Region              object
Local Authority     object
Support received    object
Measure Type        object
Value                int32
Unit                object
dtype: object