Census of Drug and Alcohol Treatment Services in Northern Ireland:Table 3 – Breakdown by Residential Status

In [1]:
from gssutils import *
if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
    inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 3')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 3']


In [2]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B12').expand(DOWN).expand(RIGHT)  


In [3]:
observations

{<M11 3.4>, <E11 4.5>, <D7 784.0>, <G11 3.6>, <D6 204.0>, <J8 7.0>, <G8 148.0>, <K10 97.8>, <J6 21.0>, <F11 3.5>, <M8 63.0>, <F5 3567.0>, <D9 19.8>, <C10 96.3>, <C5 1496.0>, <I10 96.9>, <L5 1689.0>, <G7 3613.0>, <K8 63.0>, <L10 91.6>, <L7 1547.0>, <I6 5.0>, <H5 1010.0>, <B6 108.0>, <I8 12.0>, <D10 76.0>, <B9 6.9>, <E10 95.5>, <F6 334.0>, <E7 504.0>, <F9 9.4>, <H6 57.0>, <E8 24.0>, <M10 92.2>, <E6 0.0>, <G10 88.2>, <F8 124.0>, <K6 83.0>, <D11 4.3>, <H8 44.0>, <M7 1728.0>, <I9 0.9>, <B7 1388.0>, <M6 83.0>, <G6 334.0>, <J9 6.5>, <K5 185.0>, <C11 2.2>, <I11 2.2>, <E5 528.0>, <H9 5.6>, <F7 3109.0>, <D5 1032.0>, <I7 523.0>, <G5 4095.0>, <I5 540.0>, <K9 4.4>, <J5 324.0>, <H11 4.4>, <J7 296.0>, <B11 4.5>, <M5 1874.0>, <H10 90.0>, <B8 71.0>, <G9 8.2>, <C9 1.5>, <F10 87.2>, <E9 0.0>, <H7 909.0>, <J11 2.2>, <C6 22.0>, <B10 88.6>, <M9 4.4>, <J10 91.4>, <B5 1567.0>, <C7 1441.0>, <D8 44.0>, <K7 181.0>, <C8 33.0>, <K11 3.4>}

In [4]:
Service = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Service

{<A5 'Total'>, <A13 'Residential Status'>, <A18 'Mixed'>, <A15 'Total'>, <A17 'Non-residential '>, <A16 'Residential'>, <A7 'Non-residential '>, <A6 'Residential'>, <A21 'Mixed (%)'>, <A11 'Mixed (%)'>, <A20 'Non-residential (%)'>, <A19 'Residential (%)'>, <A9 'Residential (%)'>, <A8 'Mixed'>, <A10 'Non-residential (%)'>}

In [5]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<F4 '18 and over'>, <B4 'Alcohol Only'>, <G4 'Total'>, <M4 'Total'>, <L4 '18 and over'>, <K4 'Under 18s'>, <E4 'Under 18s'>, <J4 'Drugs & Alcohol'>, <D4 'Drugs & Alcohol'>, <C4 'Drugs Only'>, <H4 'Alcohol Only'>, <I4 'Drugs Only'>}

In [6]:
sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
sex

{<B3 'Male'>, <H3 'Female  '>}

In [7]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Residential Status',DIRECTLY,LEFT),
            HDim(sex,'Sex',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Age','All'),
            HDimConst('Period', '1 March 2017')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Residential Status,Sex,Measure Type,Unit,Age,Period
0,1567.0,Alcohol Only,Total,Male,Count,People,All,1 March 2017
1,1496.0,Drugs Only,Total,Male,Count,People,All,1 March 2017
2,1032.0,Drugs & Alcohol,Total,Male,Count,People,All,1 March 2017
3,528.0,Under 18s,Total,Male,Count,People,All,1 March 2017
4,3567.0,18 and over,Total,Male,Count,People,All,1 March 2017
5,4095.0,Total,Total,Male,Count,People,All,1 March 2017
6,1010.0,Alcohol Only,Total,Female,Count,People,All,1 March 2017
7,540.0,Drugs Only,Total,Female,Count,People,All,1 March 2017
8,324.0,Drugs & Alcohol,Total,Female,Count,People,All,1 March 2017
9,185.0,Under 18s,Total,Female,Count,People,All,1 March 2017


In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Value                 float64
Treatment Type         object
Residential Status     object
Sex                    object
Measure Type           object
Unit                   object
Age                    object
Period                 object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Residential Status,Sex,Measure Type,Unit,Age,Period
75,4.4,Alcohol Only,Mixed (%),Female,Count,People,All,1 March 2017
76,2.2,Drugs Only,Mixed (%),Female,Count,People,All,1 March 2017
77,2.2,Drugs & Alcohol,Mixed (%),Female,Count,People,All,1 March 2017
78,3.4,Under 18s,Mixed (%),Female,Count,People,All,1 March 2017
79,3.4,Total,Mixed (%),Female,Count,People,All,1 March 2017


In [13]:
new_table.count()

Value                 80
Treatment Type        80
Residential Status    80
Sex                   80
Measure Type          80
Unit                  80
Age                   80
Period                80
dtype: int64

In [14]:
new_table = new_table[new_table['Value'] !=  0 ]

In [15]:
new_table.count()

Value                 78
Treatment Type        78
Residential Status    78
Sex                   78
Measure Type          78
Unit                  78
Age                   78
Period                78
dtype: int64

In [16]:
new_table['Service Type'] = 'All'
# new_table['Residential Status'] = 'All'
new_table['Health and Social Care Trust']  = 'All'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [17]:
new_table = new_table[['Period', 'Sex', 'Age', 'Service Type', 'Residential Status', 'Treatment Type', 'Health and Social Care Trust', 'Measure Type', 'Unit', 'Value']]

In [18]:
new_table.head(5)

Unnamed: 0,Period,Sex,Age,Service Type,Residential Status,Treatment Type,Health and Social Care Trust,Measure Type,Unit,Value
0,1 March 2017,Male,All,All,Total,Alcohol Only,All,Count,People,1567.0
1,1 March 2017,Male,All,All,Total,Drugs Only,All,Count,People,1496.0
2,1 March 2017,Male,All,All,Total,Drugs & Alcohol,All,Count,People,1032.0
3,1 March 2017,Male,All,All,Total,Under 18s,All,Count,People,528.0
4,1 March 2017,Male,All,All,Total,18 and over,All,Count,People,3567.0
