In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

##Imports 

#Libraries
import os
import plotly.graph_objs as go  # Offline plotting
import chart_studio.plotly as py  # Online plotting
import chart_studio
import plotly.io as pio
import pandas as pd
import datetime
import textwrap
from dotenv import load_dotenv, find_dotenv

#Local scripts
import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

##Loading environment variables and config
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)
config = utils.read_config()

##Adding plotly credentials
chart_studio.tools.set_credentials_file(
    username=os.getenv("PLOTLY_USERNAME"), api_key=os.getenv("PLOTLY_API_KEY")
)

##Setting template
pio.templates.default = "prt_template"
plotly_config = config['plotly']['config']

In [None]:
##Reading in data
df = pd.read_csv(
    f"{config['data']['clnFilePath']}prison_population.csv",
    usecols=["date", "population"],
    parse_dates=["date"],
)

##Filtering year range
df_include = df.query('date.dt.year >= 2021 & date.dt.year <= 2024')

In [None]:
df_include

In [None]:
df_include.query('date.dt.year == 2021')

In [None]:
##Calculating xaxis_tickvals
start = datetime.datetime(2021, 1, 1)
end = datetime.datetime(2021, 12, 31)

xtick_vals = pd.date_range(start, end)
filt = xtick_vals.is_month_start

month_weeks = xtick_vals[filt].isocalendar().week
month_weeks.iloc[0] = 1  # preventing week 1 from starting at the end of previous year

In [None]:
month_weeks

In [None]:
##Calculating xaxis_tickvals
start = datetime.datetime(2021, 1, 1)
end = datetime.datetime(2021, 12, 31)

xtick_vals = pd.date_range(start, end)

In [None]:
xtick_vals.isocalendar().week

In [None]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x=df_year["date"].dt.strftime("Week %U"),
        y=df_year["population"],
        mode="lines",
        connectgaps=True,
        hovertext=df["date"].dt.strftime(" "),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)


##Edit the layout

fig.update_layout(
    title="<br>".join(title),
    yaxis_dtick=2000,
    xaxis_tickvals=month_weeks,
    xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=trace_list[i].x[-1],
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x="Week 00",
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[-1, 52])

##Plot file offline
fig.show(config=plotly_config, renderer='browser')


In [None]:
##Calculating xaxis_tickvals
start = datetime.datetime(2021, 1, 1)
end = datetime.datetime(2021, 12, 31)

xtick_vals = pd.date_range(start, end)
filt = xtick_vals.is_month_start

month_weeks = xtick_vals[filt].isocalendar().week

In [None]:
xtick_vals

In [None]:
filt

In [None]:
month_weeks

In [None]:
df

In [None]:
df['week'] = df["date"].dt.strftime("Week %U")

In [None]:
df

In [None]:
df.groupby(by=df['date'].dt.year).count()

So it appears as though there are only 50 weekly observations in 2020, which could be why there is an issue with the display of some of the other years

In [None]:
for idx in range(len(trace_list)):
    print(trace_list[idx].name, trace_list[idx].x)

Looking at these returned values Week 14 is missing from 2020, but present in 2021 and 2022, which appears to be confusing the placement.
If I adjust the start year to 2021 it plots the data correctly. I may need to add a value for each week of the year in a separate column and use that to set the placement of the values.

There is also some weirdness with week 00 for the 2024 data as well which will need adjusting.

In [None]:
trace_list[-1]

In [None]:
df_include.query('date.dt.year == 2024')

In [None]:
mask = df_include['date'].dt.year == 2024
df_include.loc[mask, 'date'].dt.strftime("Week %U")

Having looked up the `strftime` codes it appears as though `%U` uses Sunday as the first day of the week. Let's see whether adjusting this makes a difference for the outcome for 2024, and then the earlier years in the timeseries.

In [None]:
mask = df_include['date'].dt.year == 2024
df_include.loc[mask, 'date'].dt.strftime("Week %W")

In [None]:
mask = df_include['date'].dt.year >= 2019
df_include.loc[mask, 'date'].dt.strftime("Week %W").value_counts()

In [None]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x=df_year["date"].dt.strftime("Week %W"),
        y=df_year["population"],
        mode="lines",
        connectgaps=True,
        hovertext=df["date"].dt.strftime(" "),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)


##Edit the layout

fig.update_layout(
    title="<br>".join(title),
    yaxis_dtick=2000,
    xaxis_tickvals=month_weeks,
    xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=trace_list[i].x[-1],
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x="Week 01",
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[-1, 52])

##Plot file offline
fig.show(config=plotly_config, renderer='browser')


That seems to have fixed the issue for 2024 at least. Let's reset the filter and run the code again.

In [None]:
##Filtering year range
df_include = df.query('date.dt.year >= 2020 & date.dt.year <= 2024')

Still the same issue for the other years, so that's the next issue to resolve.

In [None]:
df_include.groupby(by=df_include['date'].dt.year).count()

In [None]:
df_include.query('date.dt.year == 2021')

In [None]:
##Calculating xaxis_tickvals
start = datetime.datetime(2021, 1, 1)
end = datetime.datetime(2021, 12, 31)

xtick_vals = pd.date_range(start, end)
filt = xtick_vals.is_month_start

month_weeks = xtick_vals[filt].isocalendar().week

In [None]:
month_weeks

In [None]:
##Calculating xaxis_tickvals
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2018, 12, 31)

xtick_vals = pd.date_range(start, end)

In [None]:
xtick_vals.isocalendar().week

In [None]:
filt = xtick_vals.is_month_start

month_weeks = xtick_vals[filt].isocalendar().week

I'm starting to think that the issue is that I'm using a string to set the x value, rather than a datetime object, let's try replacing my x values with `.isocalendar().week` values instead

In [None]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x=df_year["date"].dt.isocalendar().week,
        y=df_year["population"],
        mode="lines",
        connectgaps=True,
        hovertext=df_year["date"].dt.strftime("%d %b"),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)


##Edit the layout

fig.update_layout(
    margin={'pad': 10},
    title="<br>".join(title),
    yaxis_dtick=2000,
    xaxis_tickvals=month_weeks,
    xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=trace_list[i].x[-1],
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x=1,
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[1, 52])

##Plot file offline
fig.show(config=plotly_config, renderer='browser')


Okay, that seems to have been the issue. I've also made some small adjustments to the hovertext to include the day and month.

# CODE CONSOLIDATION

In [2]:
##Reading in data
df = pd.read_csv(
    f"{config['data']['clnFilePath']}prison_population.csv",
    usecols=["date", "population"],
    parse_dates=["date"],
)

##Filtering year range
df_include = df.query('date.dt.year >= 2020 & date.dt.year <= 2024')

In [7]:
##Calculating xaxis_tickvals
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2018, 12, 31)

xtick_vals = pd.date_range(start, end)
filt = xtick_vals.is_month_start

month_weeks = xtick_vals[filt].isocalendar().week

In [8]:
## Chart title
title = textwrap.wrap("<b>Prison population in England and Wales</b>", width=65)

##Plotting

fig = go.Figure()

trace_list = []
for year in df_include["date"].dt.year.unique():
    df_year = df_include[df_include["date"].dt.year == year]

    trace = go.Scatter(
        x=df_year["date"].dt.isocalendar().week,
        y=df_year["population"],
        mode="lines",
        connectgaps=True,
        hovertext=df_year["date"].dt.strftime("%d %b"),
        hovertemplate="<b>%{hovertext}</b><br>" + "%{y:,.0f}",
        name=str(year),
    )

    trace_list.append(trace)

fig.add_traces(trace_list)


##Edit the layout

fig.update_layout(
    margin={'pad': 10},
    title="<br>".join(title),
    yaxis_dtick=2000,
    xaxis_tickvals=month_weeks,
    xaxis_ticktext=xtick_vals[filt].strftime("%b"),
    hovermode='x'
)

## Chart annotations
annotations = []

y_list = [0, 0, 0, 0, 0]

# Adding trace annotations
for i in range(0, len(trace_list)):
    annotations.append(
        dict(
            xref="x",
            yref="y",
            x=trace_list[i].x[-1],
            y=trace_list[i].y[-1] + y_list[i],
            text=str(trace_list[i].name),
            xanchor="left",
            align="left",
            showarrow=False,
            font_color=fig.layout.template.layout.colorway[i],
            font_size=10,
        )
    )

# Adding source label
annotations.append(
    dict(
        xref="paper",
        yref="paper",
        x=-0.08,
        y=-0.19,
        align="left",
        showarrow=False,
        text="<b>Source: Ministry of Justice Prison Population Bulletin</b>",
        font_size=12,
    )
)

# Adding y-axis label
annotations.append(
    dict(
        xref="x",
        yref="paper",
        x=1,
        y=1.04,
        align="left",
        xanchor="left",
        showarrow=False,
        text="People in prison",
        font_size=12,
    )
)

# Adding annotations to layout
fig.update_layout(annotations=annotations)

fig.update_yaxes(range=[75900, 90100], nticks=6)
fig.update_xaxes(range=[1, 52])

##Plot file offline
fig.show(config=plotly_config, renderer='browser')
