In [1]:
import pandas as pd
import textwrap
import plotly.graph_objs as go
import plotly.io as pio
from pathlib import Path #To create unique filenames for each PFA chart
import itertools

import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

config = utils.read_config()
pio.templates.default = "prt_template"

In [2]:
def loadData(status='processed', filename='PFA_2022_offences.csv') -> pd.DataFrame:
    """Load CSV file into Pandas DataFrame and convert object columns to categories when they meet criteria in `categoryColumns()`

    Parameters
    ----------
    status : {'raw', 'interim', 'processed'}, default is 'interim'
        Status of the data processing.
        * If 'raw' file is located in "rawFilePath" within config file
        * If 'interim', file is located in "intFilePath"
        * If 'processed', file is located in "clnFilePath"
    filename : str, default is 'PFA_2010-22_women_cust_comm_sus.csv'
        Name of CSV file to be loaded.

    Returns
    -------
    DataFrame
        CSV data is returned as Pandas DataFrame with any eligible object columns converted into category columns to limit memory requirements
    """
    paths = {
        "raw": 'rawFilePath',
        "interim": 'intFilePath',
        "processed": 'clnFilePath'
    }

    dfPath=f"{config['data'][paths[status]]}{filename}"
    df = pd.read_csv(dfPath)
    print('Data loaded')
    return utils.categoryColumns(df)

In [20]:
df = loadData()

Data loaded


In [21]:
df = df.rename({'Fraud Offences': 'Fraud offences'}, axis=1)

In [12]:
#Melting df2 from wide to long
df = pd.melt(df, id_vars='pfa', value_vars=df.columns[1:], var_name='offence', value_name='proportion')

#Selecting the offences that I want to continue to display at the root of the sunburst diagram
highlighted_offence_groups = ['Theft offences', 'Drug offences', 'Violence against the person']
filt = df['offence'].isin(highlighted_offence_groups)

df['parent'] = "" # Creating a new object column to prevent FutureWarning being triggered in next line
df.loc[filt, 'parent'] = "All offences" # This method prevents that annoying copy/view warning
df.loc[~filt, 'parent'] = "All other<br>offences"

#Setting discreet plotting order
plot_dict = {
    'All other offences': 0,
    'Theft offences': 1,
    'Drug offences': 2,
    'Violence against the person': 3
}

df['plot_order'] = df["offence"].map(plot_dict).fillna(0)

#Adding text wrapping for longer offence categories
def customwrap(s,width=19):
    return "<br>".join(textwrap.wrap(s,width=width))

df['offence'] = df['offence'].map(customwrap)


In [90]:
class pfaOffencesChart:
    
    def __init__(self, pfa, df=df):
        
        self.pfa = pfa
        self.df = df
        self.trace_list = [] # Need to empty my trace_list with every loop through each PFA so that charts are plotted separately
        self.annotations = []
        self.fig = go.Figure() # Need to also instantiate the figure with every loop in order to clear fig.data values

    def customwrap(self, s, width=19):
        return "<br>".join(textwrap.wrap(s, width=width))
    
    def prepareData(self):
        #Melting df from wide to long
        self.df = pd.melt(self.df, id_vars='pfa', value_vars=self.df.columns[1:], var_name='offence', value_name='proportion')

        #Selecting the offences that I want to continue to display at the root of the sunburst diagram
        highlighted_offence_groups = ['Theft offences', 'Drug offences', 'Violence against the person']
        filt = self.df['offence'].isin(highlighted_offence_groups)

        self.df['parent'] = "" # Creating a new object column to prevent FutureWarning being triggered
        self.df.loc[filt, 'parent'] = "All offences" # This method prevents that annoying copy/view warning
        self.df.loc[~filt, 'parent'] = "All other<br>offences"

        #Setting discreet plotting order
        plot_dict = {
            'All other offences': 0,
            'Theft offences': 1,
            'Drug offences': 2,
            'Violence against the person': 3
        }
        self.df['plot_order'] = self.df["offence"].map(plot_dict).fillna(0)
        
        #Wrapping longer offence text
        self.df['offence'] = self.df['offence'].map(customwrap)

        return self.df

    def createTraces(self):
        
        self.df = self.df[self.df["pfa"] == self.pfa]
        self.df = pd.concat([self.df, 
                    pd.DataFrame.from_records([{'pfa': self.df['pfa'].iloc[0], 
                                                'offence':"All other<br>offences", 
                                                'proportion': self.df.loc[~filt, 'proportion'].sum(), 
                                                'parent':"All offences",
                                                'plot_order': 0}])
                                                ],
                                                ignore_index=True
                    ).sort_values(by=['plot_order', 'proportion'], ascending=True)
        
        self.fig.add_trace(go.Sunburst(
            labels=self.df['offence'],
            parents=self.df['parent'],
            values=self.df['proportion'],
            sort=False,
            branchvalues='total',
            texttemplate="%{label} <b>%{percentRoot: .0%}</b>",
            hovertemplate="<b>%{label}</b><br>%{percentParent: .0%} of %{parent}<extra></extra>",
            hoverinfo='label+percent parent',
            insidetextorientation='radial',
            rotation=300,
        )
                    )

    def chartParams(self):
        ## Chart title
        title = textwrap.wrap(f'<b>Imprisonment of women in {self.df["pfa"].iloc[0]} by offence group in 2022</b>', width=100)

        self.fig.update_layout(
            margin = dict(t=75, l=0, r=0, b=0),
            title="<br>".join(title),
            title_y=0.94,
            title_yanchor="bottom",
            uniformtext_minsize=8,
            uniformtext_mode='hide',
            width=630,
            height=630,
            )
    
    def chartAnnotations(self):
        # Adding source label
        # prt_theme.sourceAnnotation("Ministry of Justice, Criminal justice statistics", self.annotations)

        self.annotations.append(
            dict(
                xref="paper",
                yref="paper",
                xanchor="left",
                yanchor="top",
                x=0.08,
                y=0.19,
                showarrow=False,
                text=f"Source: Ministry of Justice, Criminal justice statistics",
                font_size=12,
            )
        )

        # Adding annotations to layout
        self.fig.update_layout(annotations=self.annotations)

    def saveChart(self, filetype='eps'):
        self.filetype = filetype

        export_path = Path.joinpath(Path.cwd(), f"{config['data']['outPath']}", f"custody_offences_2022/{self.filetype}")
        export_path.mkdir(parents=True, exist_ok=True) #generate if does not exist

        # Setting filename variable and full path
        filename = str(self.df["pfa"].iloc[0])
        export_path = Path.joinpath(export_path, f'{filename}.{self.filetype}')
    
        self.fig.write_image(export_path)
    

    def outputChart(self):
        self.prepareData()
        self.createTraces()
        self.chartParams()
        self.chartAnnotations()
        self.fig.show()

In [91]:
kent = pfaOffencesChart("Kent")

In [92]:
kent.outputChart()

In [75]:
kent.saveChart()

In [80]:
for pfa in df['pfa'].unique():
    chart = pfaOffencesChart(pfa)
    chart.outputChart()
    chart.saveChart()