Table 6.2.2: Number and proportion of individuals who were smoking at the start of treatment and referred for smoking cessation interventions

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 6.2.2')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 6.2.2']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B4').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<C4 586.0>, <B4 19117.0>, <C6 277.0>, <B7 14613.0>, <B6 6896.0>, <D8 0.030994291667571>, <C7 467.0>, <D4 0.0306533451901449>, <C5 98.0>, <B8 46073.0>, <C8 1428.0>, <D6 0.0401682134570766>, <D7 0.0319578457537809>, <D5 0.0179915549843951>, <B5 5447.0>}

In [4]:
substance = tab.excel_ref('A4').expand(DOWN).is_not_blank() 
substance

{<A7 'Alcohol only'>, <A4 'Opiate'>, <A6 'Non-opiate and alcohol'>, <A8 'Total'>, <A5 'Non-opiate'>}

In [5]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<D3 '%'>, <B3 'Clients smoking at the start of treatment'>, <C3 'Clients that received smoking cessation interventions'>}

In [6]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(substance,'Substance',DIRECTLY,LEFT),
            HDimConst('Unit','People')            
            ]

In [7]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [8]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Substance,Unit
0,19117.0,Clients smoking at the start of treatment,Opiate,People
1,586.0,Clients that received smoking cessation interv...,Opiate,People
2,0.030653,%,Opiate,People
3,5447.0,Clients smoking at the start of treatment,Non-opiate,People
4,98.0,Clients that received smoking cessation interv...,Non-opiate,People
5,0.017992,%,Non-opiate,People
6,6896.0,Clients smoking at the start of treatment,Non-opiate and alcohol,People
7,277.0,Clients that received smoking cessation interv...,Non-opiate and alcohol,People
8,0.040168,%,Non-opiate and alcohol,People
9,14613.0,Clients smoking at the start of treatment,Alcohol only,People


In [9]:
new_table = new_table[new_table['OBS'] != 0 ]

In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Substance,Unit
0,19117.0,Clients smoking at the start of treatment,Opiate,People
1,586.0,Clients that received smoking cessation interv...,Opiate,People
2,0.030653,%,Opiate,People
3,5447.0,Clients smoking at the start of treatment,Non-opiate,People
4,98.0,Clients that received smoking cessation interv...,Non-opiate,People


In [12]:
def user_perc(x):
    
    if str(x) == '%':
        return 'Percentage'
    else:
        return 'Count'
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Clients in treatment']), axis = 1)


In [13]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        '%' : 'Percentage of clients received smoking cessation'
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Substance,Unit,Measure Type
10,467.0,Clients that received smoking cessation interv...,Alcohol only,People,Count
11,0.031958,Percentage of clients received smoking cessation,Alcohol only,People,Percentage
12,46073.0,Clients smoking at the start of treatment,Total,People,Count
13,1428.0,Clients that received smoking cessation interv...,Total,People,Count
14,0.030994,Percentage of clients received smoking cessation,Total,People,Percentage


In [15]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Substance                object
Unit                     object
Measure Type             object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Substance,Unit,Measure Type
0,19117.0,Clients smoking at the start of treatment,Opiate,People,Count
1,586.0,Clients that received smoking cessation interv...,Opiate,People,Count
2,0.0306533451901449,Percentage of clients received smoking cessation,Opiate,People,Percentage


In [18]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Substance,Unit,Measure Type
10,467.0,Clients that received smoking cessation interv...,Alcohol only,People,Count
11,0.0319578457537809,Percentage of clients received smoking cessation,Alcohol only,People,Percentage
12,46073.0,Clients smoking at the start of treatment,Total,People,Count
13,1428.0,Clients that received smoking cessation interv...,Total,People,Count
14,0.030994291667571,Percentage of clients received smoking cessation,Total,People,Percentage


In [19]:
new_table = new_table[['Substance','Clients in treatment','Measure Type','Value','Unit']]

In [20]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table6.2.2.csv'), index = False)