Table 4.6.1: Religion, new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.6.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.6.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<D7 3126.0>, <B12 104.0>, <G12 0.00187404168323017>, <H8 329.0>, <K19 1.0>, <K18 0.0907406500481578>, <F11 377.0>, <I17 0.0471596563559924>, <K7 0.25751342703691>, <J14 9.0>, <F5 1.0>, <E7 0.202042399172699>, <F18 1420.0>, <E5 0.000129265770423992>, <G16 0.579135669260037>, <H14 1.0>, <F14 0.0>, <I7 0.303491276885269>, <E9 6.46328852119959e-05>, <F12 33.0>, <H10 45.0>, <H6 169.0>, <E13 0.00336091003102379>, <H17 2322.0>, <D17 669.0>, <I15 0.0288400999248533>, <F10 23.0>, <E8 0.00258531540847983>, <C9 0.000124390486615584>, <B18 3941.0>, <H20 1419.0>, <H16 24514.0>, <B6 243.0>, <G15 0.0308365040604236>, <G13 0.00459992049520132>, <I19 1.0>, <D20 1244.0>, <C19 1.0>, <H11 508.0>, <H7 14943.0>, <E10 0.00200361944157187>, <J7 31549.0>, <F13 81.0>, <J21 127307.0>, <J8 566.0>, <B13 341.0>, <C11 0.0322917703254055>, <G6 0.00442955306945312>, <F17 836.0>, <E15 0.0274689762150982>, <B21 41178.0>, <D16 9142.0>, <G18 0.0806405815208132>, <H19 49237.0>, <C16 0.537615683152552>, <G9 0.0001135782838

In [4]:
Religion = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A21')
Religion

{<A16 'None'>, <A12 'Pagan'>, <A8 'Hindu'>, <A7 'Christian'>, <A10 'Jewish'>, <A17 'Decline'>, <A9 'Jain'>, <A6 'Buddhist'>, <A5 "Baha'i">, <A19 'Total'>, <A18 'Unknown'>, <A13 'Sikh'>, <A20 'Inconsistent/missing'>, <A15 'Other'>, <A14 'Zoroastrian'>, <A11 'Muslim'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<D3 'Non-opiate only'>, <B3 'Opiate'>, <F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>, <J3 'Total'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<F4 'n'>, <H4 'n'>, <I4 '%'>, <C4 '%'>, <E4 '%'>, <B4 'n'>, <K4 '%'>, <G4 '%'>, <J4 'n'>, <D4 'n'>}

In [7]:
Dimensions = [
            HDim(Religion,'Religion',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Religion,Clients,Measure Type,Unit
0,8.000000,Baha'i,Opiate,n,People
1,0.000199,Baha'i,Opiate,%,People
2,2.000000,Baha'i,Non-opiate only,n,People
3,0.000129,Baha'i,Non-opiate only,%,People
4,1.000000,Baha'i,Non-opiate and Alcohol,n,People
5,0.000057,Baha'i,Non-opiate and Alcohol,%,People
6,5.000000,Baha'i,Alcohol only,n,People
7,0.000102,Baha'i,Alcohol only,%,People
8,16.000000,Baha'i,Total,n,People
9,0.000131,Baha'i,Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [13]:
new_table.head()

Unnamed: 0,Value,Religion,Clients,Measure Type,Unit
0,8.0,Baha'i,Opiate,Count,People
1,0.000199,Baha'i,Opiate,Percentage,People
2,2.0,Baha'i,Non-opiate only,Count,People
3,0.000129,Baha'i,Non-opiate only,Percentage,People
4,1.0,Baha'i,Non-opiate and Alcohol,Count,People


In [14]:
new_table.dtypes

Value           float64
Religion         object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
new_table.head(3)

Unnamed: 0,Value,Religion,Clients,Measure Type,Unit
0,8.0,Baha'i,Opiate,Count,People
1,0.0001990247785849,Baha'i,Opiate,Percentage,People
2,2.0,Baha'i,Non-opiate only,Count,People


In [17]:
new_table['Religion'] = new_table['Religion'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [18]:
new_table['Religion'].unique()

array(["Baha'i", 'Buddhist', 'Christian', 'Hindu', 'Jain', 'Jewish',
       'Muslim', 'Pagan', 'Sikh', 'Zoroastrian', 'Other', 'None',
       'Decline', 'Unknown', 'All', 'Inconsistent/missing', None],
      dtype=object)

In [19]:
def user_perc(x):
    
    if x == None :
        return 'All inclusice Inconsistent/missing'
    else:
        return x
    
new_table['Religion'] = new_table.apply(lambda row: user_perc(row['Religion']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
new_table.head(3)

Unnamed: 0,Value,Religion,Clients,Measure Type,Unit
0,8.0,Baha'i,Opiate,Count,People
1,0.0001990247785849,Baha'i,Opiate,Percentage,People
2,2.0,Baha'i,Non-opiate only,Count,People


In [22]:
new_table = new_table[['Religion','Clients','Measure Type','Value','Unit']]

In [23]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.6.1.csv'), index = False)