Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Service Type

In [85]:
from gssutils import *
if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
    inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 2')[0]

Loading in/data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 2']


In [86]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B12').expand(DOWN).expand(RIGHT)  


In [87]:
observations

{<E9 18.2>, <C9 61.8>, <K6 35.0>, <B8 5.0>, <G8 172.0>, <L6 1143.0>, <K9 18.9>, <M6 1178.0>, <D7 437.0>, <C6 925.0>, <K10 80.0>, <M11 0.6>, <K7 148.0>, <L7 537.0>, <H9 64.4>, <C7 571.0>, <C11 0.0>, <J6 141.0>, <H10 35.6>, <D9 41.5>, <E8 0.0>, <F8 172.0>, <F10 30.0>, <B7 493.0>, <L10 31.8>, <I11 0.0>, <I9 71.7>, <J9 43.5>, <G11 4.2>, <B11 0.3>, <D10 42.3>, <G7 1501.0>, <F11 4.8>, <I8 0.0>, <J10 53.1>, <G9 59.1>, <I7 153.0>, <E11 0.0>, <D8 167.0>, <E7 432.0>, <M8 11.0>, <J11 3.4>, <L9 67.7>, <J8 11.0>, <B10 31.5>, <M9 62.9>, <E10 81.8>, <F7 1069.0>, <E6 96.0>, <M7 685.0>, <H8 0.0>, <B6 1069.0>, <K8 11.0>, <C8 0.0>, <I10 28.3>, <J7 172.0>, <H11 0.0>, <D11 16.2>, <H7 360.0>, <C10 38.2>, <M10 36.6>, <B9 68.2>, <F6 2326.0>, <G6 2422.0>, <I6 387.0>, <K11 0.6>, <G10 36.7>, <H6 650.0>, <D6 428.0>, <F9 65.2>}

In [88]:
Service = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Service

{<A9 'Statutory (%)'>, <A7 'Non-statutory'>, <A5 'Total'>, <A19 'Statutory (%)'>, <A21 'Prison (%)'>, <A18 'Prison'>, <A20 'Non-statutory (%)'>, <A10 'Non-statutory (%)'>, <A17 'Non-statutory'>, <A8 'Prison'>, <A15 'Total'>, <A11 'Prison (%)'>, <A13 'Service Type'>, <A6 'Statutory'>, <A16 'Statutory'>}

In [89]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<D4 'Drugs & Alcohol'>, <B4 'Alcohol Only'>, <E4 'Under 18s'>, <G4 'Total'>, <I4 'Drugs Only'>, <C4 'Drugs Only'>, <K4 'Under 18s'>, <J4 'Drugs & Alcohol'>, <F4 '18 and over'>, <M4 'Total'>, <H4 'Alcohol Only'>, <L4 '18 and over'>}

In [90]:
sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
sex

{<B3 'Male'>, <H3 'Female  '>}

In [91]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Service Type',DIRECTLY,LEFT),
            HDim(sex,'Sex',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Period','1 March 2017'),
            HDimConst('Age','All')
            ]

In [92]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [93]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Service Type,Sex,Measure Type,Unit,Period,Age
0,1069.0,Alcohol Only,Statutory,Male,Count,People,1 March 2017,All
1,925.0,Drugs Only,Statutory,Male,Count,People,1 March 2017,All
2,428.0,Drugs & Alcohol,Statutory,Male,Count,People,1 March 2017,All
3,96.0,Under 18s,Statutory,Male,Count,People,1 March 2017,All
4,2326.0,18 and over,Statutory,Male,Count,People,1 March 2017,All
5,2422.0,Total,Statutory,Male,Count,People,1 March 2017,All
6,650.0,Alcohol Only,Statutory,Female,Count,People,1 March 2017,All
7,387.0,Drugs Only,Statutory,Female,Count,People,1 March 2017,All
8,141.0,Drugs & Alcohol,Statutory,Female,Count,People,1 March 2017,All
9,35.0,Under 18s,Statutory,Female,Count,People,1 March 2017,All


In [94]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [95]:
new_table.dtypes

Value             float64
Treatment Type     object
Service Type       object
Sex                object
Measure Type       object
Unit               object
Period             object
Age                object
dtype: object

In [96]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Service Type,Sex,Measure Type,Unit,Period,Age
65,0.0,Alcohol Only,Prison (%),Female,Count,People,1 March 2017,All
66,0.0,Drugs Only,Prison (%),Female,Count,People,1 March 2017,All
67,3.4,Drugs & Alcohol,Prison (%),Female,Count,People,1 March 2017,All
68,0.6,Under 18s,Prison (%),Female,Count,People,1 March 2017,All
69,0.6,Total,Prison (%),Female,Count,People,1 March 2017,All


In [97]:
new_table.count()

Value             70
Treatment Type    70
Service Type      70
Sex               70
Measure Type      70
Unit              70
Period            70
Age               70
dtype: int64

In [98]:
new_table = new_table[new_table['Value'] !=  0 ]

In [99]:
new_table.count()

Value             62
Treatment Type    62
Service Type      62
Sex               62
Measure Type      62
Unit              62
Period            62
Age               62
dtype: int64

In [100]:
new_table['Treatment Type'].fillna('All', inplace = True)
#new_table['Service Type'] = 'All'
new_table['Residential Status'] = 'All'
new_table['Health and Social Care Trust']  = 'All'
new_table['Service Type'].unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


array(['Statutory', 'Non-statutory', 'Prison', 'Statutory (%)',
       'Non-statutory (%)', 'Prison (%)'], dtype=object)

In [81]:
new_table = new_table[['Period', 'Sex', 'Age', 'Service Type', 'Residential Status', 'Treatment Type', 'Health and Social Care Trust', 'Measure Type', 'Unit', 'Value']]

In [82]:
new_table

Unnamed: 0,Period,Sex,Age,Service Type,Residential Status,Treatment Type,Health and Social Care Trust,Measure Type,Unit,Value
0,1 March 2017,Male,All,Statutory,All,Alcohol Only,All,Count,People,1069.0
1,1 March 2017,Male,All,Statutory,All,Drugs Only,All,Count,People,925.0
2,1 March 2017,Male,All,Statutory,All,Drugs & Alcohol,All,Count,People,428.0
3,1 March 2017,Male,All,Statutory,All,Under 18s,All,Count,People,96.0
4,1 March 2017,Male,All,Statutory,All,18 and over,All,Count,People,2326.0
5,1 March 2017,Male,All,Statutory,All,Total,All,Count,People,2422.0
6,1 March 2017,Female,All,Statutory,All,Alcohol Only,All,Count,People,650.0
7,1 March 2017,Female,All,Statutory,All,Drugs Only,All,Count,People,387.0
8,1 March 2017,Female,All,Statutory,All,Drugs & Alcohol,All,Count,People,141.0
9,1 March 2017,Female,All,Statutory,All,Under 18s,All,Count,People,35.0


In [83]:
new_table.tail()

Unnamed: 0,Period,Sex,Age,Service Type,Residential Status,Treatment Type,Health and Social Care Trust,Measure Type,Unit,Value
63,1 March 2017,Male,All,Prison (%),All,18 and over,All,Count,People,4.8
64,1 March 2017,Male,All,Prison (%),All,Total,All,Count,People,4.2
67,1 March 2017,Female,All,Prison (%),All,Drugs & Alcohol,All,Count,People,3.4
68,1 March 2017,Female,All,Prison (%),All,Under 18s,All,Count,People,0.6
69,1 March 2017,Female,All,Prison (%),All,Total,All,Count,People,0.6


In [84]:
new_table.to_csv('testCompare.csv', index = False)