In [1]:
import pandas as pd
import textwrap
import plotly.graph_objs as go
import plotly.io as pio
from pathlib import Path #To create unique filenames for each PFA chart
import itertools

import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

config = utils.read_config()
pio.templates.default = "prt_template"

In [3]:
def loadData(status='processed', filename='PFA_2022_offences.csv') -> pd.DataFrame:
    """Load CSV file into Pandas DataFrame and convert object columns to categories when they meet criteria in `categoryColumns()`

    Parameters
    ----------
    status : {'raw', 'interim', 'processed'}, default is 'interim'
        Status of the data processing.
        * If 'raw' file is located in "rawFilePath" within config file
        * If 'interim', file is located in "intFilePath"
        * If 'processed', file is located in "clnFilePath"
    filename : str, default is 'PFA_2010-22_women_cust_comm_sus.csv'
        Name of CSV file to be loaded.

    Returns
    -------
    DataFrame
        CSV data is returned as Pandas DataFrame with any eligible object columns converted into category columns to limit memory requirements
    """
    paths = {
        "raw": 'rawFilePath',
        "interim": 'intFilePath',
        "processed": 'clnFilePath'
    }

    dfPath=f"{config['data'][paths[status]]}{filename}"
    df = pd.read_csv(dfPath)
    print('Data loaded')
    return utils.categoryColumns(df)

In [7]:
df = loadData()

Data loaded


In [8]:
df.columns

Index(['pfa', 'Criminal damage and arson', 'Drug offences', 'Fraud Offences',
       'Miscellaneous crimes against society', 'Possession of weapons',
       'Public order offences', 'Robbery', 'Sexual offences',
       'Summary motoring', 'Summary non-motoring', 'Theft offences',
       'Violence against the person'],
      dtype='object')

Fraud offences has a capital letter, let's remove. I'll circle back and resolve this in the processing code later on.

In [15]:
df = df.rename({'Fraud Offences': 'Fraud offences'}, axis=1)
df

Unnamed: 0,pfa,Criminal damage and arson,Drug offences,Fraud offences,Miscellaneous crimes against society,Possession of weapons,Public order offences,Robbery,Sexual offences,Summary motoring,Summary non-motoring,Theft offences,Violence against the person
0,Avon and Somerset,0.017,0.103,0.017,0.026,0.009,0.069,0.009,0.009,0.017,0.155,0.224,0.345
1,Bedfordshire,0.0,0.079,0.0,0.053,0.132,0.053,0.026,0.026,0.0,0.079,0.316,0.237
2,Cambridgeshire,0.0,0.118,0.044,0.015,0.059,0.074,0.015,0.0,0.0,0.162,0.324,0.191
3,Cheshire,0.014,0.122,0.108,0.014,0.041,0.054,0.027,0.0,0.014,0.095,0.297,0.216
4,Cleveland,0.01,0.08,0.03,0.06,0.06,0.02,0.05,0.02,0.03,0.07,0.41,0.16
5,Cumbria,0.0,0.069,0.0,0.0,0.0,0.172,0.0,0.0,0.034,0.069,0.345,0.31
6,Derbyshire,0.0,0.057,0.057,0.025,0.025,0.041,0.025,0.0,0.033,0.082,0.443,0.213
7,Devon and Cornwall,0.0,0.032,0.032,0.032,0.111,0.159,0.0,0.048,0.063,0.095,0.222,0.206
8,Dorset,0.043,0.174,0.13,0.043,0.043,0.0,0.0,0.0,0.0,0.087,0.174,0.304
9,Durham,0.0,0.0,0.0,0.105,0.0,0.07,0.018,0.035,0.053,0.07,0.421,0.228


In [19]:
#Melting df2 from wide to long
df2 = pd.melt(df, id_vars='pfa', value_vars=df.columns[1:], var_name='offence', value_name='proportion')

#Adding line breaks to longer offence categories

df2['offence'] = df2['offence'].replace(
    {"Violence against the person": "Violence against<br>the person",
    "all other offences": "all other<br>offences",
     "Theft offences": "Theft<br>offences",
     "Drug offences": "Drug<br>offences",
     "Possession of weapons": "Possession<br>of weapons",
     "Summary motoring": "Summary<br>motoring",
     "Summary non-motoring": "Summary<br>non-motoring",
     "Miscellaneous crimes against society": "Miscellaneous crimes<br>against society"}, regex=True)

#Selecting the offences that I want to continue to display at the root of the sunburst diagram
highlighted_offence_groups = ['Theft<br>offences', 'Drug<br>offences', 'Violence against<br>the person']
filt = df2['offence'].isin(highlighted_offence_groups)

df2.loc[filt, 'parent'] = "All offences" # This method prevents that annoying copy/view warning
df2.loc[~filt, 'parent'] = "All other<br>offences"

In [20]:
df2

Unnamed: 0,pfa,offence,proportion,parent
0,Avon and Somerset,Criminal damage and arson,0.017,All other<br>offences
1,Bedfordshire,Criminal damage and arson,0.000,All other<br>offences
2,Cambridgeshire,Criminal damage and arson,0.000,All other<br>offences
3,Cheshire,Criminal damage and arson,0.014,All other<br>offences
4,Cleveland,Criminal damage and arson,0.010,All other<br>offences
...,...,...,...,...
499,Warwickshire,Violence against<br>the person,0.220,All offences
500,West Mercia,Violence against<br>the person,0.255,All offences
501,West Midlands,Violence against<br>the person,0.231,All offences
502,West Yorkshire,Violence against<br>the person,0.195,All offences


In [88]:
for pfa in df2['pfa'].unique():
    pfa_df = df2[df2["pfa"] == pfa]
    
    #Creating an 'All other offences' parent value to contain offences listed in highlighted_offence_groups
    pfa_df = pd.concat([pfa_df, 
                    pd.DataFrame.from_records([{'pfa': pfa_df['pfa'].iloc[0], 
                                                'offence':"All other<br>offences", 
                                                'proportion': pfa_df.loc[~filt, 'proportion'].sum(), 
                                                'parent':"All offences"}])
                                                ],
                                                ignore_index=True
                    ).sort_values(by=['offence'], ascending=False)
        
        
    ## Chart title
    title = textwrap.wrap(f'<b>Imprisonment of women in {pfa_df["pfa"].iloc[0]}<br>by offence group in 2022</b>', width=100)

    fig = go.Figure(go.Sunburst(
        labels=pfa_df['offence'],
        parents=pfa_df['parent'],
        values=pfa_df['proportion'],
        branchvalues='total',
        texttemplate="%{label}<br>%{percentRoot: .0%}",
        hovertemplate="<b>%{label}</b><br>%{percentParent: .0%} of %{parent}<extra></extra>",
        hoverinfo='label+percent parent',
        insidetextorientation='horizontal',
        rotation=90,
    )
                )
    
    fig.update_traces(sort=False)
    
    fig.update_layout(
    margin = dict(t=80, l=0, r=0, b=50),
    title="<br>".join(title),
    title_y=0.94,
    title_yanchor="bottom",
    # uniformtext_minsize=7, 
    # uniformtext_mode='show',
    width=655,
    height=500,
    )

    ## Chart annotations
    annotations = []
    # Adding source label
    annotations.append(
        dict(
            xref="paper",
            yref="paper",
            x=0.04,
            y=-0.1,
            align="left",
            showarrow=False,
            text="Source: Ministry of Justice, Criminal justice statistics",
            font_size=12,
        )
    )
    # Adding annotations to layout
    fig.update_layout(annotations=annotations)

    # fig.show()

    export_path = Path.joinpath(Path.cwd(), f"{config['data']['outPath']}", "custody_offences_2022")
    export_path.mkdir(parents=True, exist_ok=True) #generate if does not exist

    # Setting filename variable and full path

    filename = str(pfa_df["pfa"].iloc[0])
    export_eps_path = Path.joinpath(export_path, f'{filename}' + '.eps')

    fig.write_image(export_eps_path)