In [1]:
import pandas as pd
import textwrap
import plotly.graph_objs as go
import plotly.io as pio
from pathlib import Path #To create unique filenames for each PFA chart
import itertools

import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

config = utils.read_config()
pio.templates.default = "prt_template"

In [2]:
def loadData(status='processed', filename='sentencesByPFA.csv') -> pd.DataFrame:
    """Load CSV file into Pandas DataFrame and convert object columns to categories when they meet criteria in `categoryColumns()`

    Parameters
    ----------
    status : {'raw', 'interim', 'processed'}, default is 'interim'
        Status of the data processing.
        * If 'raw' file is located in "rawFilePath" within config file
        * If 'interim', file is located in "intFilePath"
        * If 'processed', file is located in "clnFilePath"
    filename : str, default is 'PFA_2010-22_women_cust_comm_sus.csv'
        Name of CSV file to be loaded.

    Returns
    -------
    DataFrame
        CSV data is returned as Pandas DataFrame with any eligible object columns converted into category columns to limit memory requirements
    """
    paths = {
        "raw": 'rawFilePath',
        "interim": 'intFilePath',
        "processed": 'clnFilePath'
    }

    dfPath=f"{config['data'][paths[status]]}{filename}"
    df = pd.read_csv(dfPath)
    print('Data loaded')
    return utils.categoryColumns(df)

In [3]:
df = loadData()

Data loaded


In [4]:
df.columns

Index(['pfa', 'year', 'outcome', 'freq'], dtype='object')

In [18]:
for pfa in df['pfa'].unique():
    pfa_df = df[df["pfa"] == pfa]
    trace_list = [] # Need to empty my trace_list with every loop through each PFA so that charts are plotted separately
    fig = go.Figure() # Need to also instantiate the figure with every loop in order to clear fig.data values

    for i in pfa_df["outcome"].unique():  # Creating a for loop to extract unique values from the dataframe and make traces
        pfa_df_outcome = pfa_df[pfa_df["outcome"] == i]
        
        
        trace = go.Bar(
            x=pfa_df_outcome["year"],
            y=pfa_df_outcome["freq"],
            name=str(pfa_df_outcome["outcome"].iloc[0]),
            customdata=pfa_df_outcome["outcome"].str.lower(),
            hovertemplate="%{y} %{customdata}<extra></extra>",
        )

        trace_list.append(trace)

    fig.add_traces(trace_list)

    ## Chart title
    title = textwrap.wrap(f'<b>Sentencing of women in {pfa_df_outcome["pfa"].iloc[0]} 2010–2022</b>', width=60)

    fig.update_layout(
        margin=dict(l=64, b=75, r=20),
        barmode="group",
        title="<br>".join(title),
        title_y=0.94,
        title_yanchor="bottom",
        yaxis_title="",
        yaxis_tick0=0,
        yaxis_tickformat=",.0f",
        xaxis_showgrid=False,
        xaxis_tickcolor="#54565B",
        xaxis_dtick=2,
        xaxis_tick0=2010,
        showlegend=True,
        hovermode="x",
        modebar_activecolor="#A01D28",
        width=655,
        height=500,
    )

    fig.update_layout(legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=1
    ))

    ## Chart annotations
    annotations = []

    # # Adding source label
    source = prt_theme.sourceAnnotation("Ministry of Justice, Criminal justice statistics", annotations)

    # Adding y-axis label
    annotations.append(
        dict(
            xref="x",
            yref="paper",
            x=pfa_df_outcome["year"].iloc[0],
            y=1.04,
            align="left",
            xanchor="left",
            showarrow=False,
            text="Women sentenced",
            font_size=12,
        )
    )

    # Adding annotations to layout
    fig.update_layout(annotations=annotations)


    ## Setting chart axis ranges
    max_y_val = 0
    for i in range(len(fig.data)):
        max_trace = (fig.data[i].y).max()
        if max_trace > max_y_val:
            max_y_val = max_trace

    y_intervals = [52, 103, 204, 305, 405, 606, 909, 1210, 2020, 3040]
    y_max_idx = min(range(len(y_intervals)), key = lambda i: abs(y_intervals[i]-max_y_val))
    if y_intervals[y_max_idx] <= max_y_val:
        y_max = y_intervals[y_max_idx + 1]
    else: 
        y_max = y_intervals[y_max_idx]

    
    fig.update_yaxes(range=[0, y_max])
    fig.update_xaxes(range=[2009.3, 2022.8])

    # fig.show()

    ## Exporting to static image

    # Save results to ../reports/figures/cust_comm_sus_sent, generate if does not exist.
    export_path = Path.joinpath(Path.cwd(), f"{config['data']['outPath']}", "cust_comm_sus_sent")
    export_path.mkdir(parents=True, exist_ok=True) #generate if does not exist

    # Setting filename variable and full path

    filename = str(pfa_df_outcome["pfa"].iloc[0])
    export_svg_path = Path.joinpath(export_path, f'{filename}' + '.svg')

    fig.write_image(export_svg_path)