Table 4.1.1: Substance breakdown of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.1.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.1.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()

In [4]:
observations

{<H19 0.0>, <B13 5617.0>, <H11 0.0>, <J6 77248.0>, <I8 0.0>, <J14 3394.0>, <C19 0.000949082435600507>, <C8 0.0>, <H13 0.0>, <J7 63941.0>, <D6 0.0>, <I18 0.0>, <G15 0.0140514376535183>, <E13 0.0994100294985251>, <K8 0.0227243936063192>, <F18 118.0>, <E15 0.0282343025705858>, <K19 0.000715376876932822>, <K14 0.0126457766682812>, <C20 0.000325804418191219>, <H22 75787.0>, <E14 0.0420986093552465>, <K13 0.036353813480383>, <G10 0.547428117324086>, <K6 0.287819963485972>, <I13 0.0>, <J22 131008.0>, <F17 46.0>, <B23 141189.0>, <J19 192.0>, <D11 8557.0>, <E23 1.0>, <J23 268390.0>, <F16 142.0>, <H8 0.0>, <D18 92.0>, <H18 0.0>, <H23 75787.0>, <F22 27684.0>, <I11 0.0>, <D12 1583.0>, <H10 0.0>, <C6 0.547124776009463>, <B14 1874.0>, <D17 27.0>, <I22 1.0>, <D16 168.0>, <C22 0.195036440515904>, <K18 0.00119974663735609>, <F14 521.0>, <D8 3099.0>, <D19 30.0>, <E11 0.360598398651496>, <G8 0.108365843086259>, <C7 0.452875223990537>, <G6 0.0>, <K22 0.488125489027162>, <G18 0.00426238982805953>, <I6 0.0>

In [5]:
Substance = tab.excel_ref('A6').expand(DOWN).is_not_blank()
Substance

{<A27 '**Percentages may equal 0% or not sum to 100% due to rounding'>, <A19 'Major tranquiliser'>, <A6 'Opiate (not crack cocaine)'>, <A16 'Other prescription drug'>, <A23 'Total number of individuals *'>, <A9 'Other drug use'>, <A22 'Alcohol'>, <A12 'Benzodiazepine'>, <A8 'Crack cocaine (not opiate)'>, <A17 'Anti-depressant'>, <A7 'Both opiate and crack cocaine\xa0'>, <A18 'Solvent'>, <A13 'Amphetamine (other than ecstasy)'>, <A11 'Cocaine'>, <A15 'Hallucinogen'>, <A20 'Barbiturate'>, <A21 'Alcohol'>, <A26 '*The number of individuals will be less than the total of the reported substances as an individual may present with more than one problematic substance '>, <A10 'Cannabis'>, <A14 'Other drug**'>}

In [6]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<H3 'Alcohol only'>, <B3 'Opiate'>, <J3 'Total'>, <D3 'Non-opiate only'>, <F3 'Non-opiate and Alcohol'>}

In [7]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <F4 'n'>, <G4 '%'>, <C4 '%'>, <D4 'n'>, <E4 '%'>, <B4 'n'>, <K4 '%'>, <I4 '%'>, <J4 'n'>}

In [8]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Clients,Measure Type,Unit
0,77248.000000,Opiate (not crack cocaine),Opiate,n,People
1,0.547125,Opiate (not crack cocaine),Opiate,%,People
2,0.000000,Opiate (not crack cocaine),Non-opiate only,n,People
3,0.000000,Opiate (not crack cocaine),Non-opiate only,%,People
4,0.000000,Opiate (not crack cocaine),Non-opiate and Alcohol,n,People
5,0.000000,Opiate (not crack cocaine),Non-opiate and Alcohol,%,People
6,0.000000,Opiate (not crack cocaine),Alcohol only,n,People
7,0.000000,Opiate (not crack cocaine),Alcohol only,%,People
8,77248.000000,Opiate (not crack cocaine),Total,n,People
9,0.287820,Opiate (not crack cocaine),Total,%,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [14]:
new_table.head()

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,77248.0,Opiate (not crack cocaine),Opiate,Count,People
1,0.547125,Opiate (not crack cocaine),Opiate,Percentage,People
8,77248.0,Opiate (not crack cocaine),Total,Count,People
9,0.28782,Opiate (not crack cocaine),Total,Percentage,People
10,63941.0,Both opiate and crack cocaine,Opiate,Count,People


In [15]:
new_table.dtypes

Value           float64
Substance        object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [17]:
# def user_perc(x,y):
    
#     if x == 'Count':
#         return str(y)
#     else:
#         return y
    
# new_table['Value'] = new_table.apply(lambda row: user_perc(row['Measure Type'], row['Value']), axis = 1)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,77248.0,Opiate (not crack cocaine),Opiate,Count,People
1,0.547124776009,Opiate (not crack cocaine),Opiate,Percentage,People
8,77248.0,Opiate (not crack cocaine),Total,Count,People


In [19]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table['Substance'] = new_table['Substance'].str.rstrip('* ')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [21]:
new_table = new_table[['Substance','Clients','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.1.1.csv'), index = False)