Census of Drug and Alcohol Treatment Services in Northern Ireland:Table 3 – Breakdown by Residential Status

In [1]:
from gssutils import *
if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
    inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 3')[0]

Loading in/data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 3']


In [2]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B12').expand(DOWN).expand(RIGHT)  


In [3]:
observations

{<D11 4.3>, <H8 44.0>, <M7 1728.0>, <M6 83.0>, <B7 1388.0>, <I9 0.9>, <G6 334.0>, <J9 6.5>, <C11 2.2>, <I11 2.2>, <H9 5.6>, <F7 3109.0>, <I7 523.0>, <K9 4.4>, <H11 4.4>, <J7 296.0>, <B11 4.5>, <B10 88.6>, <H10 90.0>, <H7 909.0>, <C9 1.5>, <E9 0.0>, <F10 87.2>, <G9 8.2>, <C6 22.0>, <J11 2.2>, <M9 4.4>, <B8 71.0>, <J10 91.4>, <C7 1441.0>, <E10 95.5>, <K7 181.0>, <C8 33.0>, <K11 3.4>, <D8 44.0>, <J6 21.0>, <E11 4.5>, <G11 3.6>, <D7 784.0>, <D6 204.0>, <J8 7.0>, <M11 3.4>, <G8 148.0>, <K10 97.8>, <M8 63.0>, <F11 3.5>, <D9 19.8>, <C10 96.3>, <B6 108.0>, <I10 96.9>, <G7 3613.0>, <K8 63.0>, <L10 91.6>, <L7 1547.0>, <I6 5.0>, <B9 6.9>, <D10 76.0>, <I8 12.0>, <E7 504.0>, <F6 334.0>, <F9 9.4>, <H6 57.0>, <K6 83.0>, <E8 24.0>, <M10 92.2>, <E6 0.0>, <G10 88.2>, <F8 124.0>}

In [4]:
Service = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Service

{<A9 'Residential (%)'>, <A13 'Residential Status'>, <A21 'Mixed (%)'>, <A10 'Non-residential (%)'>, <A7 'Non-residential '>, <A11 'Mixed (%)'>, <A15 'Total'>, <A17 'Non-residential '>, <A5 'Total'>, <A19 'Residential (%)'>, <A16 'Residential'>, <A6 'Residential'>, <A20 'Non-residential (%)'>, <A8 'Mixed'>, <A18 'Mixed'>}

In [5]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<D4 'Drugs & Alcohol'>, <J4 'Drugs & Alcohol'>, <L4 '18 and over'>, <E4 'Under 18s'>, <H4 'Alcohol Only'>, <M4 'Total'>, <B4 'Alcohol Only'>, <K4 'Under 18s'>, <I4 'Drugs Only'>, <G4 'Total'>, <C4 'Drugs Only'>, <F4 '18 and over'>}

In [6]:
sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
sex

{<H3 'Female  '>, <B3 'Male'>}

In [7]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Residential Status',DIRECTLY,LEFT),
            HDim(sex,'Sex',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Age','All'),
            HDimConst('Period', '1 March 2017')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Residential Status,Sex,Measure Type,Unit,Age,Period
0,108.0,Alcohol Only,Residential,Male,Count,People,All,1 March 2017
1,22.0,Drugs Only,Residential,Male,Count,People,All,1 March 2017
2,204.0,Drugs & Alcohol,Residential,Male,Count,People,All,1 March 2017
3,0.0,Under 18s,Residential,Male,Count,People,All,1 March 2017
4,334.0,18 and over,Residential,Male,Count,People,All,1 March 2017
5,334.0,Total,Residential,Male,Count,People,All,1 March 2017
6,57.0,Alcohol Only,Residential,Female,Count,People,All,1 March 2017
7,5.0,Drugs Only,Residential,Female,Count,People,All,1 March 2017
8,21.0,Drugs & Alcohol,Residential,Female,Count,People,All,1 March 2017
9,83.0,Under 18s,Residential,Female,Count,People,All,1 March 2017


In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Value                 float64
Treatment Type         object
Residential Status     object
Sex                    object
Measure Type           object
Unit                   object
Age                    object
Period                 object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Residential Status,Sex,Measure Type,Unit,Age,Period
63,4.4,Alcohol Only,Mixed (%),Female,Count,People,All,1 March 2017
64,2.2,Drugs Only,Mixed (%),Female,Count,People,All,1 March 2017
65,2.2,Drugs & Alcohol,Mixed (%),Female,Count,People,All,1 March 2017
66,3.4,Under 18s,Mixed (%),Female,Count,People,All,1 March 2017
67,3.4,Total,Mixed (%),Female,Count,People,All,1 March 2017


In [13]:
new_table.count()

Value                 68
Treatment Type        68
Residential Status    68
Sex                   68
Measure Type          68
Unit                  68
Age                   68
Period                68
dtype: int64

In [14]:
new_table = new_table[new_table['Value'] !=  0 ]

In [15]:
new_table.count()

Value                 66
Treatment Type        66
Residential Status    66
Sex                   66
Measure Type          66
Unit                  66
Age                   66
Period                66
dtype: int64

In [17]:
new_table['Service Type'] = 'all'
# new_table['Residential Status'] = 'All'
new_table['Health and Social Care Trust']  = 'all'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
new_table = new_table[['Period', 'Sex', 'Age', 'Service Type', 'Residential Status', 'Treatment Type', 'Health and Social Care Trust', 'Measure Type', 'Unit', 'Value']]

In [19]:
new_table.head(5)

Unnamed: 0,Period,Sex,Age,Service Type,Residential Status,Treatment Type,Health and Social Care Trust,Measure Type,Unit,Value
0,1 March 2017,Male,All,all,Residential,Alcohol Only,all,Count,People,108.0
1,1 March 2017,Male,All,all,Residential,Drugs Only,all,Count,People,22.0
2,1 March 2017,Male,All,all,Residential,Drugs & Alcohol,all,Count,People,204.0
4,1 March 2017,Male,All,all,Residential,18 and over,all,Count,People,334.0
5,1 March 2017,Male,All,all,Residential,Total,all,Count,People,334.0


In [20]:
#new_table.to_csv('testCompare.csv', index = False)