Table 4.4.1: Ethnicity of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.4.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.4.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<D19 90.0>, <B6 5579.0>, <J9 3324.0>, <I9 0.0167791937996884>, <E6 0.0360532113201152>, <B18 928.0>, <J25 268390.0>, <J7 4692.0>, <B23 137722.0>, <J5 222775.0>, <H6 3057.0>, <C7 0.0129391092200229>, <H18 105.0>, <D25 23730.0>, <F23 27360.0>, <K21 0.000326030502807274>, <E19 0.0038129130655821>, <F25 27684.0>, <E16 0.0124978817149636>, <H10 492.0>, <J14 2028.0>, <G10 0.0179093567251462>, <H23 75093.0>, <F15 293.0>, <K12 0.00874595779042304>, <J10 2738.0>, <G8 0.0129751461988304>, <D12 347.0>, <I8 0.0156339472387573>, <C18 0.00673821175992216>, <F21 8.0>, <C20 0.00201129812230435>, <D14 215.0>, <I17 0.00422143209087398>, <I16 0.00850944828412768>, <J23 263779.0>, <D5 18762.0>, <H24 694.0>, <D11 393.0>, <C8 0.0103106257533292>, <E23 1.0>, <E7 0.0273258769700051>, <I10 0.00655187567416404>, <D20 102.0>, <H25 75787.0>, <F22 0.0>, <D23 23604.0>, <K8 0.011858411776525>, <H13 549.0>, <G20 0.00292397660818713>, <H7 1716.0>, <E12 0.0147008981528554>, <C11 0.0089455569916208>, <D13 219.0>, <E17 

In [4]:
Ethnicity = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Ethnicity

{<A12 'Pakistani'>, <A19 'White and Asian'>, <A23 'Total'>, <A6 'Other white'>, <A21 'Chinese'>, <A25 'Total'>, <A5 'White British'>, <A7 'Not stated'>, <A14 'Other'>, <A8 'White Irish'>, <A9 'Indian'>, <A16 'African'>, <A15 'Other black'>, <A18 'Bangladeshi'>, <A11 'White and black Caribbean'>, <A13 'Other Asian'>, <A17 'Other mixed'>, <A22 'Unknown'>, <A28 '*Percentages may equal 0% or not sum to 100% due to rounding'>, <A24 'Inconsistent/missing'>, <A10 'Caribbean'>, <A20 'White and black African'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<D3 'Non-opiate only'>, <J3 'Total'>, <H3 'Alcohol only'>, <B3 'Opiate'>, <F3 'Non-opiate and Alcohol'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<D4 'n'>, <B4 'n'>, <K4 '%'>, <H4 'n'>, <J4 'n'>, <G4 '%'>, <I4 '%'>, <F4 'n'>, <C4 '%'>, <E4 '%'>}

In [7]:
Dimensions = [
            HDim(Ethnicity,'Ethnicity',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Ethnicity,Clients,Measure Type,Unit
0,117280.000000,White British,Opiate,n,People
1,0.851571,White British,Opiate,%,People
2,18762.000000,White British,Non-opiate only,n,People
3,0.794865,White British,Non-opiate only,%,People
4,22782.000000,White British,Non-opiate and Alcohol,n,People
5,0.832675,White British,Non-opiate and Alcohol,%,People
6,63951.000000,White British,Alcohol only,n,People
7,0.851624,White British,Alcohol only,%,People
8,222775.000000,White British,Total,n,People
9,0.844552,White British,Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [13]:
new_table.head()

Unnamed: 0,Value,Ethnicity,Clients,Measure Type,Unit
0,117280.0,White British,Opiate,Count,People
1,0.851571,White British,Opiate,Percentage,People
2,18762.0,White British,Non-opiate only,Count,People
3,0.794865,White British,Non-opiate only,Percentage,People
4,22782.0,White British,Non-opiate and Alcohol,Count,People


In [14]:
new_table.dtypes

Value           float64
Ethnicity        object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
new_table.head(3)

Unnamed: 0,Value,Ethnicity,Clients,Measure Type,Unit
0,117280.0,White British,Opiate,Count,People
1,0.851570555176,White British,Opiate,Percentage,People
2,18762.0,White British,Non-opiate only,Count,People


In [17]:
new_table['Ethnicity'] = new_table['Ethnicity'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [18]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table.head(3)

Unnamed: 0,Value,Ethnicity,Clients,Measure Type,Unit
0,117280.0,White British,Opiate,Count,People
1,0.851570555176,White British,Opiate,Percentage,People
2,18762.0,White British,Non-opiate only,Count,People


In [21]:
new_table = new_table[['Ethnicity','Clients','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.4.1.csv'), index = False)