Table 4.7.1: Sexual orientation, new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.7.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.7.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<G11 1.0>, <D12 975.0>, <J5 110840.0>, <C6 0.0138575492985382>, <C11 1.0>, <D11 15741.0>, <E8 0.00260466298202147>, <B9 2481.0>, <I6 0.0263606294477298>, <J12 3077.0>, <F10 142.0>, <K7 0.0155356999114546>, <D8 41.0>, <C7 0.0177572844108702>, <F5 15763.0>, <D13 16716.0>, <D6 520.0>, <J13 127307.0>, <D9 921.0>, <E5 0.874213836477987>, <I5 0.896742507767866>, <B8 75.0>, <C8 0.00183949769449622>, <B6 565.0>, <D7 314.0>, <I11 1.0>, <H9 2812.0>, <C9 0.0608505837339351>, <I8 0.0020046106043901>, <B13 41178.0>, <K11 1.0>, <H5 44734.0>, <H7 511.0>, <E7 0.0199479067403596>, <H12 771.0>, <B12 406.0>, <H6 1315.0>, <F9 945.0>, <G9 0.0529946164199192>, <K8 0.00207679304515817>, <F11 17832.0>, <E11 1.0>, <I9 0.0563696501954495>, <G8 0.00235531628532974>, <H10 413.0>, <D10 184.0>, <G5 0.883972633467923>, <E6 0.0330347500158821>, <D5 13761.0>, <J6 2959.0>, <J9 7159.0>, <E10 0.011689219236389>, <J10 1084.0>, <K10 0.00872575062384287>, <B10 345.0>, <H8 100.0>, <K6 0.0238187233357482>, <K9 0.057626982210

In [4]:
Sexualorientation = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A13')
Sexualorientation

{<A11 'Total'>, <A5 'Heterosexual'>, <A6 'Gay/Lesbian'>, <A8 'Client asked and does not know or is not sure'>, <A10 'Other'>, <A9 'Not Stated'>, <A7 'Bi-Sexual'>, <A12 'Inconsistent/missing'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>, <B3 'Opiate'>, <J3 'Total'>, <D3 'Non-opiate only'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <B4 'n'>, <C4 '%'>, <D4 'n'>, <G4 '%'>, <K4 '%'>, <E4 '%'>, <I4 '%'>, <F4 'n'>, <J4 'n'>}

In [7]:
Dimensions = [
            HDim(Sexualorientation,'Sexual orientation',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Sexual orientation,Clients,Measure Type,Unit
0,36582.000000,Heterosexual,Opiate,n,People
1,0.897233,Heterosexual,Opiate,%,People
2,13761.000000,Heterosexual,Non-opiate only,n,People
3,0.874214,Heterosexual,Non-opiate only,%,People
4,15763.000000,Heterosexual,Non-opiate and Alcohol,n,People
5,0.883973,Heterosexual,Non-opiate and Alcohol,%,People
6,44734.000000,Heterosexual,Alcohol only,n,People
7,0.896743,Heterosexual,Alcohol only,%,People
8,110840.000000,Heterosexual,Total,n,People
9,0.892216,Heterosexual,Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [13]:
new_table.head()

Unnamed: 0,Value,Sexual orientation,Clients,Measure Type,Unit
0,36582.0,Heterosexual,Opiate,Count,People
1,0.897233,Heterosexual,Opiate,Percentage,People
2,13761.0,Heterosexual,Non-opiate only,Count,People
3,0.874214,Heterosexual,Non-opiate only,Percentage,People
4,15763.0,Heterosexual,Non-opiate and Alcohol,Count,People


In [14]:
new_table.dtypes

Value                 float64
Sexual orientation     object
Clients                object
Measure Type           object
Unit                   object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

In [16]:
new_table.head(3)

Unnamed: 0,Value,Sexual orientation,Clients,Measure Type,Unit
0,36582.0,Heterosexual,Opiate,Count,People
1,0.897233395467478,Heterosexual,Opiate,Percentage,People
2,13761.0,Heterosexual,Non-opiate only,Count,People


In [17]:
new_table['Sexual orientation'] = new_table['Sexual orientation'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [18]:
new_table['Sexual orientation'].unique()

array(['Heterosexual', 'Gay/Lesbian', 'Bi-Sexual',
       'Client asked and does not know or is not sure', 'Not Stated',
       'Other', 'All', 'Inconsistent/missing', None], dtype=object)

In [19]:
def user_perc(x):
    
    if x == None :
        return 'All inclusice Inconsistent/missing'
    else:
        return x
    
new_table['Sexual orientation'] = new_table.apply(lambda row: user_perc(row['Sexual orientation']), axis = 1)

In [20]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

In [21]:
new_table.head(3)

Unnamed: 0,Value,Sexual orientation,Clients,Measure Type,Unit
0,36582.0,Heterosexual,Opiate,Count,People
1,0.897233395467478,Heterosexual,Opiate,Percentage,People
2,13761.0,Heterosexual,Non-opiate only,Count,People


In [22]:
new_table = new_table[['Sexual orientation','Clients','Measure Type','Value','Unit']]

In [23]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.7.1.csv'), index = False)