In [3]:
import pandas as pd
import textwrap
import plotly.graph_objs as go
import plotly.io as pio
from pathlib import Path #To create unique filenames for each PFA chart

import src.data.utilities as utils
import src.visualization.prt_theme

config = utils.read_config()
pio.templates.default = "prt_template"

In [4]:
def loadData(status='processed', filename='PFA_2022_offences.csv') -> pd.DataFrame:
    """Load CSV file into Pandas DataFrame and convert object columns to categories when they meet criteria in `categoryColumns()`

    Parameters
    ----------
    status : {'raw', 'interim', 'processed'}, default is 'interim'
        Status of the data processing.
        * If 'raw' file is located in "rawFilePath" within config file
        * If 'interim', file is located in "intFilePath"
        * If 'processed', file is located in "clnFilePath"
    filename : str, default is 'PFA_2010-22_women_cust_comm_sus.csv'
        Name of CSV file to be loaded.

    Returns
    -------
    DataFrame
        CSV data is returned as Pandas DataFrame with any eligible object columns converted into category columns to limit memory requirements
    """
    paths = {
        "raw": 'rawFilePath',
        "interim": 'intFilePath',
        "processed": 'clnFilePath'
    }

    dfPath=f"{config['data'][paths[status]]}{filename}"
    df = pd.read_csv(dfPath)
    print('Data loaded')
    return utils.categoryColumns(df)

In [5]:
df = loadData()

Data loaded


In [6]:
df = df.rename({'Fraud Offences': 'Fraud offences'}, axis=1)

In [None]:
df

In [7]:
class pfaOffencesChart:
    
    def __init__(self, pfa, df=df):
        
        self.pfa = pfa
        self.df = df
        self.trace_list = [] # Need to empty my trace_list with every loop through each PFA so that charts are plotted separately
        self.annotations = []
        self.fig = go.Figure() # Need to also instantiate the figure with every loop in order to clear fig.data values

    def customWrap(self, s, width=19):
        return "<br>".join(textwrap.wrap(s, width=width))
    
    def prepareData(self):
        #Melting df from wide to long
        self.df = pd.melt(self.df, id_vars='pfa', value_vars=self.df.columns[1:], var_name='offence', value_name='proportion')

        #Selecting the offences that I want to continue to display at the root of the sunburst diagram
        highlighted_offence_groups = ['Theft offences', 'Drug offences', 'Violence against the person']
        self.filt = self.df['offence'].isin(highlighted_offence_groups)

        self.df['parent'] = "" # Creating a new object column to prevent FutureWarning being triggered
        self.df.loc[self.filt, 'parent'] = "All offences" # This method prevents that annoying copy/view warning
        self.df.loc[~self.filt, 'parent'] = "All other<br>offences"

        #Setting discreet plotting order
        plot_dict = {
            'All other offences': 0,
            'Theft offences': 1,
            'Drug offences': 2,
            'Violence against the person': 3
        }
        self.df['plot_order'] = self.df["offence"].map(plot_dict).fillna(0)
        
        #Wrapping longer offence text
        self.df.loc[self.filt, 'offence'] = self.df.loc[self.filt, 'offence'].map((lambda x: self.customWrap(x, width=12))) # Wrapping highlighted_offence_groups with a smaller width argument
        self.df.loc[~self.filt, 'offence'] = self.df.loc[~self.filt, 'offence'].map(self.customWrap) # Wrapping all other offences

        return self.df

    def createTraces(self):
        self.df = self.df[self.df["pfa"] == self.pfa]
        self.df = pd.concat([
            self.df,
            pd.DataFrame.from_records([{
                'pfa': self.df['pfa'].iloc[0], 
                'offence':"All other<br>offences", 
                'proportion': self.df.loc[~self.filt, 'proportion'].sum(), 
                'parent':"All offences",
                'plot_order': 0
            }])
        ], ignore_index=True).sort_values(by=['plot_order', 'proportion'], ascending=True)

        # Creating the Sunburst trace
        sunburst_trace = go.Sunburst(
            labels=self.df['offence'],
            parents=self.df['parent'],
            values=self.df['proportion'],
            sort=False,
            branchvalues='total',
            texttemplate="%{label} <b>%{percentRoot: .0%}</b>",
            hovertemplate="<b>%{label}</b><br>%{percentParent: .0%} of %{parent}<extra></extra>",
            hoverinfo='label+percent parent',
            insidetextorientation='radial',
            rotation=300,
            domain_column=0,
            domain_row=0
        )

        self.fig.add_trace(sunburst_trace)


    
    def chartParams(self):
        ## Chart title
        title = textwrap.wrap(f'<b>Imprisonment of women in {self.df["pfa"].iloc[0]} by offence group in 2022</b>', width=100)

        self.fig.update_layout(
            margin = dict(t=75, l=0, r=0, b=0),
            title="<br>".join(title),
            title_y=0.94,
            title_yanchor="bottom",
            # uniformtext_minsize=8,
            # uniformtext_mode='hide',
            width=630,
            height=630,
            )
    
    def chartAnnotations(self):
        
        # Adding source label
        self.annotations.append(
            dict(
                xref="paper",
                yref="paper",
                xanchor="left",
                yanchor="top",
                x=0.05,
                y=0.07,
                showarrow=False,
                text=f"Source: Ministry of Justice, Criminal justice statistics",
                font_size=12,
            )
        )

        # Adding annotations to layout
        self.fig.update_layout(annotations=self.annotations)

    def addMissingOffenceLabels(self, number=0):

        missing_labels = self.fig.data[0].labels[:number]
        missing_values = self.fig.data[0].values[:number]

        # Manually add annotations for labels outside the sunburst chart (only for hidden labels)
        for label, value in zip(missing_labels, missing_values):
                self.annotations.append(
                    dict(
                        x=0.5,  # Adjust the x-coordinate to control the distance from the chart
                        y=0.1,
                        text=f"{label} ({value:.0%})",  # Display the label and proportion
                        showarrow=True,
                        xanchor='left',
                        font=dict(size=10),  # Adjust the font size as needed
                    )
                )
        # Adding annotations to layout
        self.fig.update_layout(annotations=self.annotations)
        
        # Displaying chart
        return self.fig.show()

    def saveChart(self, filetype='eps'):
        self.filetype = filetype

        if self.trace_list == []: #Allows saveChart method to run without outputChart requirement
            self.prepareData()
            self.createTraces()
            self.chartParams()
            self.chartAnnotations()

        export_path = Path.joinpath(Path.cwd(), f"{config['data']['outPath']}", f"custody_offences_2022/{self.filetype}")
        export_path.mkdir(parents=True, exist_ok=True) #generate if does not exist

        # Setting filename variable and full path
        filename = str(self.df["pfa"].iloc[0])
        export_path = Path.joinpath(export_path, f'{filename}.{self.filetype}')
    
        self.fig.write_image(export_path)
    

    def outputChart(self):
        self.prepareData()
        self.createTraces()
        self.chartParams()
        self.chartAnnotations()
        self.fig.show()

In [8]:
kent = pfaOffencesChart("Kent")

In [10]:
kent.outputChart()

In [None]:
kent.addMissingOffenceLabels(3)

In [None]:
kent.saveChart()

In [12]:
type(kent.fig.data[0].labels[:3])

numpy.ndarray

In [None]:
label_dict = {key:value for (key,value) in zip(kent.fig.data[0].labels[:3],kent.fig.data[0].values[:3])}

In [None]:
label_dict

In [None]:
for pfa in df['pfa'].unique():
    chart = pfaOffencesChart(pfa)
    chart.outputChart()
    # chart.saveChart()

In [None]:
avon = pfaOffencesChart("Avon and Somerset")
avon.outputChart()

In [None]:
avon.fig.update_layout(uniformtext_mode='hide')

In [None]:
avon.fig.show()

In [24]:
from src.visualization import Chart2_PFA_Offences as chart

In [25]:
df = utils.load_data('processed', 'custodial_sentences_by_offence_2022.csv')
df

Data loaded


Unnamed: 0,pfa,Criminal damage and arson,Drug offences,Fraud offences,Miscellaneous crimes against society,Not known,Possession of weapons,Public order offences,Robbery,Sexual offences,Summary motoring,Summary non-motoring,Theft offences,Violence against the person
0,Avon and Somerset,0.017,0.103,0.017,0.026,0.0,0.009,0.069,0.009,0.009,0.017,0.155,0.224,0.345
1,Bedfordshire,0.0,0.079,0.0,0.053,0.0,0.132,0.053,0.026,0.026,0.0,0.079,0.316,0.237
2,Cambridgeshire,0.0,0.118,0.044,0.015,0.0,0.059,0.074,0.015,0.0,0.0,0.162,0.324,0.191
3,Cheshire,0.014,0.122,0.108,0.014,0.0,0.041,0.054,0.027,0.0,0.014,0.095,0.297,0.216
4,Cleveland,0.01,0.08,0.03,0.06,0.0,0.06,0.02,0.05,0.02,0.03,0.07,0.41,0.16
5,Cumbria,0.0,0.069,0.0,0.0,0.0,0.0,0.172,0.0,0.0,0.034,0.069,0.345,0.31
6,Derbyshire,0.0,0.057,0.057,0.025,0.0,0.025,0.041,0.025,0.0,0.033,0.082,0.443,0.213
7,Devon and Cornwall,0.0,0.032,0.032,0.032,0.0,0.111,0.159,0.0,0.048,0.063,0.095,0.222,0.206
8,Dorset,0.043,0.174,0.13,0.043,0.0,0.043,0.0,0.0,0.0,0.0,0.087,0.174,0.304
9,Durham,0.0,0.0,0.0,0.105,0.0,0.0,0.07,0.018,0.035,0.053,0.07,0.421,0.228


In [30]:
west_mids = chart.PfaOffencesChart('West Midlands', df)

In [31]:
west_mids.output_chart()

In [33]:
west_mids.fig.layout.title.text = "<b>Imprisonment of women in the West Midlands by<br>offence group in 2022</b>"

In [34]:
west_mids.fig.show()

In [38]:
west_mids.fig.write_image('reports/figures/2.custody_offences_2022/pdf/West Midlands.pdf')