In [2]:
import pandas as pd
import numpy as np
import json
import altair as alt

let's compare the most recent data we have for Wales, England and Scotland

In [3]:
eng_df = pd.read_excel('England/August-2023-AE-by-provider-96Jke.xls', skiprows=15)

eng_df = eng_df[["Code", "System", "Percentage in 4 hours or less (all)"]]
eng_df = eng_df.rename(columns={"Percentage in 4 hours or less (all)": "value", 'Code': 'id', 'System': 'name'})
eng_df = eng_df.dropna()
eng_df['nation'] = 'England'

Wales

In [4]:
with open("Wales/4hr_ae_2023.json") as f:
    data = json.load(f)
    data = data['value']
    wls_df = pd.DataFrame(data)
wls_df = wls_df.query("Date_Code == '2023m08'")
wls_df = wls_df[wls_df.Hospital_ItemName_ENG.str.contains("Board")]
wls_df = wls_df[["Hospital_Code" ,"Hospital_ItemName_ENG", "Data"]]
wls_df = wls_df.rename(columns={"Hospital_Code": "id", "Hospital_ItemName_ENG": "name", "Data": "value"})
wls_df['value'] = wls_df['value'].astype(float)*0.01
wls_df['nation'] = 'Wales'

Scotland

In [5]:
sct_df = pd.read_csv("Scotland/ae.csv")
sct_df = sct_df.query("Month == 202308")
#sct_df = sct_df.query("DepartmentType == 'Emergency Department'")

names_df = pd.read_csv("Scotland/Health_Boards_(May_2016)_Names_and_Codes_in_Scotland.csv")
# build into a "HB16CD" to "HB16NM" mapping
names_df = names_df[["HB16CD", "HB16NM"]]
names_df = names_df.rename(columns={"HB16CD": "HBT", "HB16NM": "name"})

sct_df = sct_df.merge(names_df, on="HBT")

sct_df = sct_df[["name", "HBT", "NumberOfAttendancesAll", "NumberWithin4HoursAll"]]
sct_df = sct_df.groupby(['HBT', 'name']).sum()

sct_df = sct_df.reset_index()
sct_df['value'] = sct_df['NumberWithin4HoursAll'] / sct_df['NumberOfAttendancesAll']

sct_df = sct_df[["HBT", "name", "value"]]
sct_df = sct_df.rename(columns={"HBT": "id"})
sct_df['nation'] = 'Scotland'

sct_df



Unnamed: 0,id,name,value,nation
0,S08000015,Ayrshire and Arran,0.703477,Scotland
1,S08000016,Borders,0.623162,Scotland
2,S08000017,Dumfries and Galloway,0.809988,Scotland
3,S08000019,Forth Valley,0.598728,Scotland
4,S08000020,Grampian,0.698593,Scotland
5,S08000022,Highland,0.852598,Scotland
6,S08000024,Lothian,0.64389,Scotland
7,S08000025,Orkney,0.8774,Scotland
8,S08000026,Shetland,0.832241,Scotland
9,S08000028,Western Isles,0.984743,Scotland


UK

In [6]:
df = pd.concat([eng_df, wls_df, sct_df])
df = df.dropna(subset=["name", "value"])

df = df.rename(columns={"value": "0_on_target"})
df["1_off_target"] = 1 - df["0_on_target"]
df = df.melt(id_vars=["id", "name", "nation"], value_vars=["0_on_target", "1_off_target"], var_name="series", value_name="value")

df['nameShort'] = df['name'].str.replace("NHS ", "")
df['nameShort'] = df['nameShort'].str.replace("Health Board", "")
df['nameShort'] = df['nameShort'].str.replace("Integrated Care Board", "")
# add a flag to nameShort by Nation
flags = {
    "England" : "🏴󠁧󠁢󠁥󠁮󠁧󠁿",
    "Wales" : "🏴󠁧󠁢󠁷󠁬󠁳󠁿",
    "Scotland" : "🏴󠁧󠁢󠁳󠁣󠁴󠁿",
    "Northern Ireland" : "NI"
}
df["nameShort"] = df["nation"].map(flags)+ df["nameShort"]
df = df.query("id != '-'")
df = df.sort_values(by=['value'], ascending=False)

In [36]:
df.query("nation == 'Scotland'")

Unnamed: 0,id,name,nation,series,value,nameShort
57,S08000028,Western Isles,Scotland,0_on_target,0.984743,🏴󠁧󠁢󠁳󠁣󠁴󠁿Western Isles
55,S08000025,Orkney,Scotland,0_on_target,0.8774,🏴󠁧󠁢󠁳󠁣󠁴󠁿Orkney
53,S08000022,Highland,Scotland,0_on_target,0.852598,🏴󠁧󠁢󠁳󠁣󠁴󠁿Highland
56,S08000026,Shetland,Scotland,0_on_target,0.832241,🏴󠁧󠁢󠁳󠁣󠁴󠁿Shetland
50,S08000017,Dumfries and Galloway,Scotland,0_on_target,0.809988,🏴󠁧󠁢󠁳󠁣󠁴󠁿Dumfries and Galloway
48,S08000015,Ayrshire and Arran,Scotland,0_on_target,0.703477,🏴󠁧󠁢󠁳󠁣󠁴󠁿Ayrshire and Arran
52,S08000020,Grampian,Scotland,0_on_target,0.698593,🏴󠁧󠁢󠁳󠁣󠁴󠁿Grampian
54,S08000024,Lothian,Scotland,0_on_target,0.64389,🏴󠁧󠁢󠁳󠁣󠁴󠁿Lothian
49,S08000016,Borders,Scotland,0_on_target,0.623162,🏴󠁧󠁢󠁳󠁣󠁴󠁿Borders
51,S08000019,Forth Valley,Scotland,0_on_target,0.598728,🏴󠁧󠁢󠁳󠁣󠁴󠁿Forth Valley


In [9]:
targets = df.query("series == '0_on_target'").sort_values(by=['value']).head(20).name.unique().tolist()
temp_df = df.query("name in @targets")
temp_df['order'] = -temp_df.name.apply(lambda x: targets.index(x))


chart = alt.Chart(temp_df).mark_bar().encode(
    y=alt.Y('nameShort:N', sort=alt.EncodingSortField(field="order", op="sum", order='descending'),
    axis=alt.Axis(
        labelAngle=0,
        labelFont = "Circular Std",
        labelColor='white',), 
    title=None),
    x=alt.X('value:Q', axis=alt.Axis(format='%',
        gridOpacity=0.1,
        gridColor='white',
        labelFont = "Circular Std",
        labelColor='white',
        title=None)),
    color=alt.Color('series:N', legend=None, scale=alt.Scale(range=['rgb(136, 210, 214)', 'rgba(255, 255, 255, 0.2)'])),
    order = alt.Order('order:N'),
    tooltip=['name', 'series', 'value']
).properties(
    background="rgb(30, 42, 55)",
    width=400,
    height=400,
    title=alt.TitleParams(
        text="Longest A&E Waits",
        subtitle=["% seen within 4 hours, Great Britain only, August 2023",""],
        subtitleColor="white",
        color="white",
        font="Circular Std",
        fontSize=14,
        anchor="start",
    )
).configure_view(
    stroke=None
)
chart.display()

chart.save("png/longest_ae_waits.png", scale_factor=3)
chart.save("svg/longest_ae_waits.svg")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df['order'] = -temp_df.name.apply(lambda x: targets.index(x))


WARN Channel order is inappropriate for nominal field, which has no inherent order.
WARN Channel order is inappropriate for nominal field, which has no inherent order.


### Chart: Forth Valley Timeseries

In [10]:
sct_df = pd.read_csv("Scotland/ae.csv")
#sct_df = sct_df.query("DepartmentType == 'Emergency Department'")

names_df = pd.read_csv("Scotland/Health_Boards_(May_2016)_Names_and_Codes_in_Scotland.csv")
# build into a "HB16CD" to "HB16NM" mapping
names_df = names_df[["HB16CD", "HB16NM"]]
names_df = names_df.rename(columns={"HB16CD": "HBT", "HB16NM": "name"})

sct_df = sct_df.merge(names_df, on="HBT")

sct_df = sct_df[["Month", "name", "HBT", "NumberOfAttendancesAll", "NumberWithin4HoursAll"]]
sct_df = sct_df.groupby(["Month", 'HBT', 'name']).sum()

sct_df = sct_df.reset_index()
sct_df['date'] = pd.to_datetime(sct_df['Month'], format='%Y%m')
sct_df['value'] = sct_df['NumberWithin4HoursAll'] / sct_df['NumberOfAttendancesAll']
sct_df = sct_df[["date", "HBT", "name", "value"]]
sct_df = sct_df.rename(columns={"HBT": "id"})
sct_df['nation'] = 'Scotland'


base = alt.Chart(sct_df).encode(
    x=alt.X('date:T', axis=alt.Axis(format='%Y',
        gridOpacity=0,
        gridColor='white',
        labelFont = "Circular Std",
        labelColor='white',
        title=None)),
    y=alt.Y('value:Q', 
        scale=alt.Scale(zero=False),
        axis=alt.Axis(
            gridOpacity = 0.3,
        gridColor='white',
        title=None,
        labelFont = "Circular Std",
        labelColor='white',
            format='%',)),
    color=alt.condition(
        alt.datum.name == 'Forth Valley', alt.value("rgb(136, 210, 214)"), alt.value("rgba(255, 255, 255, 0.1)")
    ),
    detail='name:N',
)

line = base.mark_line()

label = base.transform_filter(
    "datum.name == 'Forth Valley' & year(datum.date) == 2023 & month(datum.date) == 7"
    ).mark_text(
        font="Circular Std",
        size=12,
        dx=5,
        align="left"
).encode(
    text=alt.Text("name:N")
)

chart = line + label

chart = chart.properties(
    background="rgb(30, 42, 55)",
    width=500,
    height=350,
    title=alt.TitleParams(
        text="Scotland A&E Waits",
        subtitle=["% seen within 4 hours, Scotland only",""],
        subtitleColor="white",
        color="white",
        font="Circular Std",
        fontSize=14,
        anchor="start",
    )
).configure_view(
    stroke=None
)

chart.display()
chart.save("png/forth_valley_scotland.png", scale_factor=3)


### A teaser version:

In [31]:
base = alt.Chart(sct_df).encode(
    x=alt.X('date:T', axis=alt.Axis(format='%Y',
        gridOpacity=0,
        gridColor='white',
        labelFont = "Circular Std",
        labelColor='white',
        tickOpacity=0,
        labelFontSize=20,
        title=None)),
    y=alt.Y('value:Q', 
        scale=alt.Scale(zero=True),
        axis=alt.Axis(
            domain=False,
            gridOpacity = 0,
        gridColor='white',
        values=[0.5, 0.75, 1],
        labels=False,
        labelFontSize=16,
        tickOpacity=0,
        title=None,
        labelFont = "Circular Std",
        labelColor='white',
            format='%',)),
    color=alt.condition(
        alt.datum.name == 'Forth Valley', alt.value("rgb(136, 210, 214)"), alt.value("rgba(255, 255, 255, 0.1)")
    ),
    detail='name:N',
)

line = base.mark_line()

label = base.transform_filter(
    "datum.name == 'Forth Valley' & year(datum.date) == 2023 & month(datum.date) == 7"
    ).mark_text(
        font="Circular Std",
        size=24,
        dx=5,
        align="left"
).encode(
    text=alt.Text("name:N")
)

chart = line + label

chart = chart.properties(
    background="rgba(30, 42, 55,0)",
    width=500,
    height=350,
).configure_view(
    stroke=None
)

chart.display()
chart.save("png/forth_valley_teaser.png", scale_factor=1)


### Shropshire Timeseries

In [113]:
sct_df.query("date == '2023-08-01'").sort_values(by='value')

Unnamed: 0,date,id,name,value,nation
1933,2023-08-01,S08000019,Forth Valley,0.598728,Scotland
1931,2023-08-01,S08000016,Borders,0.623162,Scotland
1936,2023-08-01,S08000024,Lothian,0.64389,Scotland
1934,2023-08-01,S08000020,Grampian,0.698593,Scotland
1930,2023-08-01,S08000015,Ayrshire and Arran,0.703477,Scotland
1932,2023-08-01,S08000017,Dumfries and Galloway,0.809988,Scotland
1938,2023-08-01,S08000026,Shetland,0.832241,Scotland
1935,2023-08-01,S08000022,Highland,0.852598,Scotland
1937,2023-08-01,S08000025,Orkney,0.8774,Scotland
1939,2023-08-01,S08000028,Western Isles,0.984743,Scotland


In [None]:



sct_df = sct_df[["name", "HBT", "NumberOfAttendancesAll", "NumberWithin4HoursAll"]]
sct_df = sct_df.groupby(['HBT', 'name']).sum()

sct_df = sct_df.reset_index()
sct_df['value'] = sct_df['NumberWithin4HoursAll'] / sct_df['NumberOfAttendancesAll']

sct_df = sct_df[["HBT", "name", "value"]]
sct_df = sct_df.rename(columns={"HBT": "id"})
sct_df['nation'] = 'Scotland'

sct_df



In [115]:
df.tail(20)

Unnamed: 0,id,name,value,nation,nameShort
27,QH8,NHS Mid And South Essex Integrated Care Board,0.681306,England,Mid And South Essex
280,7A2W11000025,Hywel Dda University Health Board,0.678879,Wales,Hywel Dda University
28,QMM,NHS Norfolk And Waveney Integrated Care Board,0.665442,England,Norfolk And Waveney
6,S08000024,Lothian,0.659932,Scotland,Lothian
12,QJK,NHS Devon Integrated Care Board,0.656094,England,Devon
1,S08000016,Borders,0.654734,Scotland,Borders
21,QM7,NHS Hertfordshire And West Essex Integrated Ca...,0.653668,England,Hertfordshire And West Essex
236,7A4W11000029,Cardiff and Vale University Health Board,0.639309,Wales,Cardiff and Vale University
20,QGH,NHS Herefordshire And Worcestershire Integrate...,0.638711,England,Herefordshire And Worcestershire
17,QOP,NHS Greater Manchester Integrated Care Board,0.630361,England,Greater Manchester


In [None]:
alt.Chart

In [107]:
df.groupby('nation').mean()

  df.groupby('nation').mean()


Unnamed: 0_level_0,value
nation,Unnamed: 1_level_1
England,0.723788
Northern Ireland,0.517608
Scotland,0.787312
Wales,0.731899


In [102]:
df.sort_values(by=['value'], ascending=False)
df

Unnamed: 0,id,name,value,nation
2,QOX,"NHS Bath And North East Somerset, Swindon And ...",0.728448,England
3,QHG,"NHS Bedfordshire, Luton And Milton Keynes Inte...",0.853728,England
4,QHL,NHS Birmingham And Solihull Integrated Care Board,0.696995,England
5,QUY,"NHS Bristol, North Somerset And South Gloucest...",0.692137,England
6,QU9,"NHS Buckinghamshire, Oxfordshire And Berkshire...",0.719976,England
...,...,...,...,...
5,S08000022,Highland,0.849459,Scotland
6,S08000024,Lothian,0.659932,Scotland
7,S08000025,Orkney,0.926205,Scotland
8,S08000026,Shetland,0.947368,Scotland


In [76]:
sct_df.to_excel("temp.xlsx")

In [75]:
sct_df.name.value_counts()

Highland                 4
Grampian                 3
Lothian                  3
Ayrshire and Arran       2
Dumfries and Galloway    2
Borders                  1
Forth Valley             1
Orkney                   1
Shetland                 1
Western Isles            1
Name: name, dtype: int64

In [68]:
sct_df.query("HBT == 'S08000028'")

Unnamed: 0,Month,Country,HBT,TreatmentLocation,DepartmentType,NumberOfAttendancesAll,NumberWithin4HoursAll,NumberOver4HoursAll,PercentageWithin4HoursAll,NumberOfAttendancesEpisode,...,PercentageWithin4HoursEpisode,PercentageWithin4HoursEpisodeQF,NumberOver8HoursEpisode,NumberOver8HoursEpisodeQF,PercentageOver8HoursEpisode,PercentageOver8HoursEpisodeQF,NumberOver12HoursEpisode,NumberOver12HoursEpisodeQF,PercentageOver12HoursEpisode,PercentageOver12HoursEpisodeQF
14335,202006,S92000003,S08000028,W107H,Emergency Department,374,368,6,98.4,374.0,...,98.4,,0.0,,0.0,,0.0,,0.0,


In [67]:
sct_df[sct_df.HBT.str.contains("S08000021")]

Unnamed: 0,Month,Country,HBT,TreatmentLocation,DepartmentType,NumberOfAttendancesAll,NumberWithin4HoursAll,NumberOver4HoursAll,PercentageWithin4HoursAll,NumberOfAttendancesEpisode,...,PercentageWithin4HoursEpisode,PercentageWithin4HoursEpisodeQF,NumberOver8HoursEpisode,NumberOver8HoursEpisodeQF,PercentageOver8HoursEpisode,PercentageOver8HoursEpisodeQF,NumberOver12HoursEpisode,NumberOver12HoursEpisodeQF,PercentageOver12HoursEpisode,PercentageOver12HoursEpisodeQF


In [63]:
sct_df.columns

Index(['Month', 'Country', 'HBT', 'TreatmentLocation', 'DepartmentType',
       'NumberOfAttendancesAll', 'NumberWithin4HoursAll',
       'NumberOver4HoursAll', 'PercentageWithin4HoursAll',
       'NumberOfAttendancesEpisode', 'NumberOfAttendancesEpisodeQF',
       'NumberWithin4HoursEpisode', 'NumberWithin4HoursEpisodeQF',
       'NumberOver4HoursEpisode', 'NumberOver4HoursEpisodeQF',
       'PercentageWithin4HoursEpisode', 'PercentageWithin4HoursEpisodeQF',
       'NumberOver8HoursEpisode', 'NumberOver8HoursEpisodeQF',
       'PercentageOver8HoursEpisode', 'PercentageOver8HoursEpisodeQF',
       'NumberOver12HoursEpisode', 'NumberOver12HoursEpisodeQF',
       'PercentageOver12HoursEpisode', 'PercentageOver12HoursEpisodeQF'],
      dtype='object')

Unnamed: 0,HBT,name
0,S08000015,Ayrshire and Arran
1,S08000016,Borders
2,S08000017,Dumfries and Galloway
3,S08000018,Fife
4,S08000019,Forth Valley
5,S08000020,Grampian
6,S08000021,Greater Glasgow and Clyde
7,S08000022,Highland
8,S08000023,Lanarkshire
9,S08000024,Lothian


In [71]:
names_df

Unnamed: 0,HB16CD,HB16NM,OBJECTID
0,S08000015,Ayrshire and Arran,0
1,S08000016,Borders,1
2,S08000017,Dumfries and Galloway,2
3,S08000018,Fife,3
4,S08000019,Forth Valley,4
5,S08000020,Grampian,5
6,S08000021,Greater Glasgow and Clyde,6
7,S08000022,Highland,7
8,S08000023,Lanarkshire,8
9,S08000024,Lothian,9


In [70]:
names_df.to_dict('index')

{0: {'HB16CD': 'S08000015', 'HB16NM': 'Ayrshire and Arran', 'OBJECTID': 0},
 1: {'HB16CD': 'S08000016', 'HB16NM': 'Borders', 'OBJECTID': 1},
 2: {'HB16CD': 'S08000017', 'HB16NM': 'Dumfries and Galloway', 'OBJECTID': 2},
 3: {'HB16CD': 'S08000018', 'HB16NM': 'Fife', 'OBJECTID': 3},
 4: {'HB16CD': 'S08000019', 'HB16NM': 'Forth Valley', 'OBJECTID': 4},
 5: {'HB16CD': 'S08000020', 'HB16NM': 'Grampian', 'OBJECTID': 5},
 6: {'HB16CD': 'S08000021',
  'HB16NM': 'Greater Glasgow and Clyde',
  'OBJECTID': 6},
 7: {'HB16CD': 'S08000022', 'HB16NM': 'Highland', 'OBJECTID': 7},
 8: {'HB16CD': 'S08000023', 'HB16NM': 'Lanarkshire', 'OBJECTID': 8},
 9: {'HB16CD': 'S08000024', 'HB16NM': 'Lothian', 'OBJECTID': 9},
 10: {'HB16CD': 'S08000025', 'HB16NM': 'Orkney', 'OBJECTID': 10},
 11: {'HB16CD': 'S08000026', 'HB16NM': 'Shetland', 'OBJECTID': 11},
 12: {'HB16CD': 'S08000027', 'HB16NM': 'Tayside', 'OBJECTID': 12},
 13: {'HB16CD': 'S08000028', 'HB16NM': 'Western Isles', 'OBJECTID': 13}}

In [48]:
nir_df.reset_index()

Unnamed: 0,Trust,Under 4 Hours,Total,value
0,Belfast,6550.0,15016.0,0.436201
1,Northern,6364.0,12332.0,0.516056
2,South Eastern,6675.0,11794.0,0.565966
3,Southern,7630.0,13604.0,0.560864
4,Western,5486.0,10779.0,0.508953


  nir_df.groupby('Trust').sum()


Unnamed: 0_level_0,Under 4 Hours,Total
Trust,Unnamed: 1_level_1,Unnamed: 2_level_1
Belfast,6550.0,15016.0
Northern,6364.0,12332.0
South Eastern,6675.0,11794.0
Southern,7630.0,13604.0
Western,5486.0,10779.0


In [33]:
wls_df

Unnamed: 0,id,name,value,nation
236,7A4W11000029,Cardiff and Vale University Health Board,63.9309,Wales
237,7A6W11000028,Aneurin Bevan University Health Board,71.5414,Wales
244,7A1W11000023,Betsi Cadwaladr University Health Board,62.7427,Wales
255,7A7W11000024,Powys Teaching Health Board,99.8466,Wales
280,7A2W11000025,Hywel Dda University Health Board,67.8879,Wales
