Table 4.7.1: Sexual orientation, new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.gov.uk/government/collections/alcohol-and-drug-misuse-and-treatment-statistics')
    scraper.select_dataset(title=lambda x: x.startswith('Substance misuse treatment for adults'), latest=True)
    tabs = {tab.name: tab for tab in scraper.distribution(title=lambda x: x.startswith('Data tables')).as_databaker()}

In [2]:
tab = tabs['Table 4.7.1']

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
cell = tab.filter('Sexual orientation')
cell.assert_one()
obs = tab.filter('Inconsistent/missing').shift(0,1).expand(RIGHT)
observations = tab.filter('n').fill(DOWN).is_not_blank().is_not_whitespace() - obs
Sexualorientation = cell.expand(DOWN).is_not_blank().is_not_whitespace()
Clients = cell.expand(RIGHT).is_not_blank().is_not_whitespace()
Dimensions = [
            HDim(Sexualorientation,'Treatment group',DIRECTLY,LEFT),
            HDim(Clients,'Clients in treatment',CLOSEST,LEFT),
            HDimConst('Measure Type','Count'),
            HDimConst('Unit','new-presentations-to-treatment'),
            HDimConst('Period', 'gregorian-interval/2017-04-01T00:00:00/P1Y')
            ]

In [4]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)

0,1,2
OBS,Treatment group,Clients in treatment

0,1,2,3,4,5,6,7,8,9,10
"Table 4.7.1: Sexual orientation, new presentations to treatment",,,,,,,,,,
Link back to the index,,,,,,,,,,
Sexual orientation,Opiate,,Non-opiate only,,Non-opiate and Alcohol,,Alcohol only,,Total,
,n,%,n,%,n,%,n,%,n,%
Heterosexual,36582.0,0.897233395467478,13761.0,0.874213836477987,15763.0,0.883972633467923,44734.0,0.896742507767866,110840.0,0.89221605087338
Gay/Lesbian,565.0,0.0138575492985382,520.0,0.0330347500158821,559.0,0.0313481381785554,1315.0,0.0263606294477298,2959.0,0.0238187233357482
Bi-Sexual,724.0,0.0177572844108702,314.0,0.0199479067403596,381.0,0.0213660834454913,511.0,0.0102435601884334,1930.0,0.0155356999114546
Client asked and does not know or is not sure,75.0,0.00183949769449622,41.0,0.00260466298202147,42.0,0.00235531628532974,100.0,0.0020046106043901,258.0,0.00207679304515817
Not Stated,2481.0,0.0608505837339351,921.0,0.0585096245473604,945.0,0.0529946164199192,2812.0,0.0563696501954495,7159.0,0.0576269822104162
Other,345.0,0.00846168939468262,184.0,0.011689219236389,142.0,0.00796321220278152,413.0,0.0082790417961311,1084.0,0.00872575062384287


In [5]:
new_table = c1.topandas()




In [6]:
import numpy as np
new_table['OBS'].replace('', np.nan, inplace=True)
new_table.dropna(subset=['OBS'], inplace=True)
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Value'] = new_table['Value'].astype(int)
new_table['Value'] = new_table['Value'].map(lambda x:''
                                  if (x == '-')
                                  else int(x))

In [7]:
new_table['Basis of treatment'] = new_table['Treatment group'].map(
    lambda x: {
        'Sexual orientation':'sexual-orientation',
        'Heterosexual':'sexual-orientation/heterosexual',
        'Gay/Lesbian':'sexual-orientation/gay/lesbian',
        'Bi-Sexual':'sexual-orientation/bi-sexual',
        'Client asked and does not know or is not sure':'sexual-orientation/client-asked-and-does-not-know-or-is-not-sure',
        'Not Stated':'sexual-orientation/not-stated',
        'Other':'sexual-orientation/other',
        'Inconsistent/missing':'sexual-orientation/inconsistent/missing',
        'Total':'sexual-orientation/total'        
        }.get(x, x))

In [8]:
import urllib.request as request
import csv
import io
import requests
vrl="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/substance-type.csv"
t=requests.get(vrl).content
g=pd.read_csv(io.StringIO(t.decode('utf-8')))
new_table = pd.merge(new_table, g, how = 'left', left_on = 'Clients in treatment', right_on = 'Label')
new_table.columns = ['Substance type' if x=='Notation' else x for x in new_table.columns]
new_table = new_table[['Period','Basis of treatment','Substance type','Measure Type','Value','Unit']]

In [9]:
new_table

Unnamed: 0,Period,Basis of treatment,Substance type,Measure Type,Value,Unit
0,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/heterosexual,opiate,Count,36582,clients-in-treatment
1,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/heterosexual,non-opiate-only,Count,13761,clients-in-treatment
2,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/heterosexual,non-opiate-and-alcohol,Count,15763,clients-in-treatment
3,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/heterosexual,alcohol-only,Count,44734,clients-in-treatment
4,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/heterosexual,total,Count,110840,clients-in-treatment
5,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/gay/lesbian,opiate,Count,565,clients-in-treatment
6,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/gay/lesbian,non-opiate-only,Count,520,clients-in-treatment
7,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/gay/lesbian,non-opiate-and-alcohol,Count,559,clients-in-treatment
8,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/gay/lesbian,alcohol-only,Count,1315,clients-in-treatment
9,gregorian-interval/2017-04-01T00:00:00/P1Y,sexual-orientation/gay/lesbian,total,Count,2959,clients-in-treatment
