# Prison population of England and Wales

## Updating data visualizations using new Gov.uk data source

In [2]:
##Imports 

#Libraries
import os
import plotly.graph_objs as go  # Offline plotting
import chart_studio.plotly as py  # Online plotting
import chart_studio
import plotly.io as pio
import pandas as pd
import datetime
import textwrap
from dotenv import load_dotenv, find_dotenv

#Local scripts
import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

In [3]:
##Loading environment variables and config
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)
config = utils.read_config()

##Adding plotly credentials
chart_studio.tools.set_credentials_file(
    username=os.getenv("PLOTLY_USERNAME"), api_key=os.getenv("PLOTLY_API_KEY")
)

##Setting template
pio.templates.default = "prt_template"
plotly_config = config['plotly']['config']

In [4]:
##Reading in data
df = pd.read_csv(
    f"{config['data']['clnFilePath']}processed_data.csv",
    parse_dates=["date"],
    )

df

Unnamed: 0,date,group,type,value
0,2020-01-03,female,prison,3688.0
1,2020-01-03,male,prison,78871.0
2,2020-01-03,total,hdc,2759.0
3,2020-01-03,total,operational_capacity,84796.0
4,2020-01-03,total,prison,82559.0
...,...,...,...,...
1456,2025-02-03,total,prison,86802.0
1457,2025-02-03,youth,hdc,
1458,2025-02-03,youth,headroom,105.0
1459,2025-02-03,youth,operational_capacity,452.0


In [5]:
##Filtering data
filter1 = df['group'] == 'total'
filter2 = df['type'] == 'prison'
filter3 = df['date'].dt.year >= 2021

df_include = df[filter1 & filter2 & filter3]

In [6]:
df_include

Unnamed: 0,date,group,type,value
249,2021-01-15,total,prison,77942.0
254,2021-01-22,total,prison,77955.0
259,2021-01-29,total,prison,77976.0
264,2021-02-05,total,prison,78024.0
269,2021-02-12,total,prison,78038.0
...,...,...,...,...
1392,2025-01-06,total,prison,85689.0
1408,2025-01-13,total,prison,85853.0
1424,2025-01-20,total,prison,86068.0
1440,2025-01-27,total,prison,86463.0


In [11]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x=df_year["date"].dt.isocalendar().week,
        y=df_year["value"],
        mode="lines",
        connectgaps=True,
        hovertext=df_year["date"].dt.strftime("%d %b"),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)

##Edit the layout

fig.update_layout(
    margin=dict(l=64, b=75, r=64, pad=10),
    title="<br>".join(title),
    yaxis_dtick=2000,
    # xaxis_tickvals=month_weeks,
    # xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    if i < 4:
        # For the first four traces, use a fixed x position
        x_position = 52
    else:
        # For the current year's trace, use the last x value position
        x_position = trace_list[i].x[-1]

    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=x_position,
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x=1,
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[1, 52])

##Plot file offline
fig.show(config=plotly_config)

Right, so this is plotting okay with the new data source, but there's an issue with the 2024 data which needs further investigation. I suspect that the "wrong" week is being applied during the plotting.

In [None]:
df_include.query("date.dt.year == 2024").iloc[-1]["date"].isocalendar().week

1

Yep, as suspected it's assigning week 1 rather than the end of the year. Let's force it to the end by not relying entirely on isocalendar.

In [23]:
# Example date
date = pd.Timestamp("2024-12-30")

# Get "week of the year" relative to Jan 1
week_num = (date - pd.Timestamp(f"{date.year}-01-01")).days // 7 + 1
print(f"Adjusted Week Number: {week_num}")

Adjusted Week Number: 53


Right, that works so let's now integrate this into the code.

In [None]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x = ((df_year["date"] - df_year["date"].dt.year.astype(str).apply(lambda y: pd.Timestamp(f"{y}-01-01"))).dt.days // 7) + 1, # Calculate week number relative to Jan 1
        y=df_year["value"],
        mode="lines",
        connectgaps=True,
        hovertext=df_year["date"].dt.strftime("%d %b"),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)

##Edit the layout

fig.update_layout(
    margin=dict(l=64, b=75, r=64, pad=10),
    title="<br>".join(title),
    yaxis_dtick=2000,
    # xaxis_tickvals=month_weeks,
    # xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    if i < 4:
        # For the first four traces, use a fixed x position
        x_position = 53
    else:
        # For the current year's trace, use the last x value position
        x_position = trace_list[i].x[-1]

    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=x_position,
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x=1,
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[1, 53])

##Plot file offline
fig.show(config=plotly_config)

In [35]:
# Generate date range for a sample year
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2018, 12, 31)

# Create DataFrame with date column
df = pd.DataFrame({"date": pd.date_range(start, end)})

# Get first occurrence of each month
df["month"] = df["date"].dt.month
df["week"] = ((df["date"] - df["date"].dt.year.astype(str).apply(lambda y: pd.Timestamp(f"{y}-01-01"))).dt.days // 7) + 1

# Get the first week of each month
month_weeks = df.groupby("month")["week"].first()

# Get month names
month_labels = df["date"].dt.strftime("%b").unique()

# Create tick dictionary
xaxis_tickvals = month_weeks.tolist()
xaxis_ticktext = month_labels.tolist()

print(xaxis_tickvals, xaxis_ticktext)


[1, 5, 9, 13, 18, 22, 26, 31, 35, 40, 44, 48] ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


In [37]:
# Sample DataFrame with a range of dates
df = pd.DataFrame({"date": pd.date_range("2016-01-01", "2024-12-31")})  # Example range

# Get the oldest year dynamically
oldest_year = df["date"].dt.year.min()

# Calculate week number relative to Jan 1 of that year
df["week"] = ((df["date"] - pd.Timestamp(f"{oldest_year}-01-01")).dt.days // 7) + 1

# Extract month names and first week number for each month
df["month"] = df["date"].dt.month
month_weeks = df.groupby("month")["week"].first()
month_labels = df["date"].dt.strftime("%b").unique()  # Get unique month names

# Convert to lists for Plotly
xaxis_tickvals = month_weeks.tolist()
xaxis_ticktext = month_labels.tolist()

print(f"Oldest Year: {oldest_year}")
print("Week Numbers:", xaxis_tickvals)
print("Month Labels:", xaxis_ticktext)


Oldest Year: 2016
Week Numbers: [1, 5, 9, 14, 18, 22, 27, 31, 35, 40, 44, 48]
Month Labels: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


## Incorporating into script and testing

In [38]:
%load_ext autoreload
%autoreload 2

In [46]:
from src.visualization import prison_population