Table 4.3.1: Gender of all Sex in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.3.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.3.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<D6 6610.0>, <B9 185870.0>, <D9 82520.0>, <G6 0.0884161108834159>, <E7 0.0897358216190015>, <C6 0.0921073868833055>, <C5 0.553090869962877>, <D7 7405.0>, <C8 0.245698606552967>, <G9 1.0>, <G5 0.526059093110772>, <B7 20279.0>, <F5 141189.0>, <G8 0.282376392563061>, <G7 0.103148403442751>, <C9 1.0>, <B8 45668.0>, <F6 23730.0>, <E6 0.0801017935046049>, <E9 1.0>, <D5 38386.0>, <C7 0.10910313660085>, <B6 17120.0>, <E8 0.364990305380514>, <B5 102803.0>, <F9 268390.0>, <F7 27684.0>, <E5 0.46517207949588>, <F8 75787.0>, <D8 30119.0>}

In [4]:
Substance = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Substance

{<A6 'Non-opiate only'>, <A9 'Total'>, <A5 'Opiate'>, <A8 'Alcohol only'>, <A7 'Non-opiate and Alcohol'>}

In [5]:
Sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Sex

{<F3 'Total'>, <B3 'Male'>, <D3 'Female'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<D4 'n'>, <G4 '%'>, <F4 'n'>, <B4 'n'>, <C4 '%'>, <E4 '%'>}

In [7]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Sex,Measure Type,Unit
0,102803.0,Opiate,Male,n,People
1,0.553091,Opiate,Male,%,People
2,38386.0,Opiate,Female,n,People
3,0.465172,Opiate,Female,%,People
4,141189.0,Opiate,Total,n,People
5,0.526059,Opiate,Total,%,People
6,17120.0,Non-opiate only,Male,n,People
7,0.092107,Non-opiate only,Male,%,People
8,6610.0,Non-opiate only,Female,n,People
9,0.080102,Non-opiate only,Female,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [13]:
new_table.head()

Unnamed: 0,Value,Substance,Sex,Measure Type,Unit
0,102803.0,Opiate,Male,Count,People
1,0.553091,Opiate,Male,Percentage,People
2,38386.0,Opiate,Female,Count,People
3,0.465172,Opiate,Female,Percentage,People
4,141189.0,Opiate,Total,Count,People


In [14]:
new_table.dtypes

Value           float64
Substance        object
Sex              object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

In [16]:
new_table.head(3)

Unnamed: 0,Value,Substance,Sex,Measure Type,Unit
0,102803.0,Opiate,Male,Count,People
1,0.553090869963,Opiate,Male,Percentage,People
2,38386.0,Opiate,Female,Count,People


In [17]:
new_table['Substance'] = new_table['Substance'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [18]:
new_table['Sex'] = new_table['Sex'].map(
    lambda x: {
        'Total' : 'T',
        'Female' : 'F',
        'Male' :  'M'
        }.get(x, x))

In [19]:
new_table = new_table[['Substance','Sex','Measure Type','Value','Unit']]

In [20]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.3.1.csv'), index = False)