Table 4.4.1: Ethnicity of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.gov.uk/government/collections/alcohol-and-drug-misuse-and-treatment-statistics')
    scraper.select_dataset(title=lambda x: x.startswith('Substance misuse treatment for adults'), latest=True)
    tabs = {tab.name: tab for tab in scraper.distribution(title=lambda x: x.startswith('Data tables')).as_databaker()}

In [2]:
tab = tabs['Table 4.4.1']

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
cell = tab.filter('Ethnicity')
cell.assert_one()
obs = tab.filter('Inconsistent/missing').shift(0,-1).expand(RIGHT)
observations = tab.filter('n').fill(DOWN).is_not_blank().is_not_whitespace() - obs
ethnicity = cell.expand(DOWN).is_not_blank().is_not_whitespace() 
Clients = cell.expand(RIGHT).is_not_blank().is_not_whitespace()
Dimensions = [
            HDim(ethnicity,'Treatment group',DIRECTLY,LEFT),
            HDim(Clients,'Clients in treatment',CLOSEST,LEFT),
            HDimConst('Measure Type','Count'),
            HDimConst('Unit','clients-in-treatment'),
            HDimConst('Period', 'gregorian-interval/2017-04-01T00:00:00/P1Y')
            ]

In [4]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)

0,1,2
OBS,Treatment group,Clients in treatment

0,1,2,3,4,5,6,7,8,9,10
Table 4.4.1: Ethnicity of all clients in treatment,,,,,,,,,,
Link back to the index,,,,,,,,,,
Ethnicity,Opiate,,Non-opiate only,,Non-opiate and Alcohol,,Alcohol only,,Total,
,n,%,n,%,n,%,n,%,n,%
White British,117280.0,0.85157055517637,18762.0,0.794865277071683,22782.0,0.832675438596491,63951.0,0.851623986257041,222775.0,0.844551689103378
Other white,5579.0,0.040509141604101,851.0,0.0360532113201152,860.0,0.0314327485380117,3057.0,0.0407095201949583,10347.0,0.039226018750545
Not stated,1782.0,0.0129391092200229,645.0,0.0273258769700051,549.0,0.0200657894736842,1716.0,0.0228516639367185,4692.0,0.0177876176647876
White Irish,1420.0,0.0103106257533292,179.0,0.00758346043043552,355.0,0.0129751461988304,1174.0,0.0156339472387573,3128.0,0.011858411776525
Indian,1593.0,0.0115667794542629,191.0,0.0080918488391798,280.0,0.010233918128655,1260.0,0.0167791937996884,3324.0,0.012601458038737
Caribbean,1185.0,0.00860428980119371,571.0,0.024190815116082,490.0,0.0179093567251462,492.0,0.00655187567416404,2738.0,0.0103799013568176


In [5]:
new_table = c1.topandas()




In [6]:
import numpy as np
new_table['OBS'].replace('', np.nan, inplace=True)
new_table.dropna(subset=['OBS'], inplace=True)
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Value'] = new_table['Value'].astype(int)
new_table['Value'] = new_table['Value'].map(lambda x:''
                                  if (x == '-')
                                  else int(x))

In [7]:
new_table['Basis of treatment'] = new_table['Treatment group'].map(
    lambda x: {
        'Ethnicity':'ethnicity',
        'White British':'ethnicity/white-british',
        'Other white':'ethnicity/other-white',
        'Not stated':'ethnicity/not-stated',
        'White Irish':'ethnicity/white-irish',
        'Indian':'ethnicity/indian',
        'Caribbean':'ethnicity/caribbean',
        'White and black Caribbean':'ethnicity/white-and-black-caribbean',
        'Pakistani':'ethnicity/pakistani',
        'Other Asian':'ethnicity/other-asian',
        'Other':'ethnicity/other',
        'Other black':'ethnicity/other-black',
        'African':'ethnicity/african',
        'Other mixed':'ethnicity/other-mixed',
        'Bangladeshi':'ethnicity/bangladeshi',
        'White and Asian':'ethnicity/white-and-asian',
        'White and black African':'ethnicity/white-and-black-african',
        'Chinese':'ethnicity/chinese',
        'Unknown':'ethnicity/unknown',
        'Inconsistent/missing':'ethnicity/inconsistent/missing',
        'Total':'ethnicity/total'        
        }.get(x, x))

In [8]:
import urllib.request as request
import csv
import io
import requests
vrl="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/substance-type.csv"
t=requests.get(vrl).content
g=pd.read_csv(io.StringIO(t.decode('utf-8')))
new_table = pd.merge(new_table, g, how = 'left', left_on = 'Clients in treatment', right_on = 'Label')
new_table.columns = ['Substance type' if x=='Notation' else x for x in new_table.columns]
new_table = new_table[['Period','Basis of treatment','Substance type','Measure Type','Value','Unit']]

In [9]:
new_table

Unnamed: 0,Period,Basis of treatment,Substance type,Measure Type,Value,Unit
0,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/white-british,opiate,Count,117280,clients-in-treatment
1,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/white-british,non-opiate-only,Count,18762,clients-in-treatment
2,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/white-british,non-opiate-and-alcohol,Count,22782,clients-in-treatment
3,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/white-british,alcohol-only,Count,63951,clients-in-treatment
4,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/white-british,total,Count,222775,clients-in-treatment
5,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/other-white,opiate,Count,5579,clients-in-treatment
6,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/other-white,non-opiate-only,Count,851,clients-in-treatment
7,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/other-white,non-opiate-and-alcohol,Count,860,clients-in-treatment
8,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/other-white,alcohol-only,Count,3057,clients-in-treatment
9,gregorian-interval/2017-04-01T00:00:00/P1Y,ethnicity/other-white,total,Count,10347,clients-in-treatment
