# Chart 1: Code consolidation

In [1]:
import pandas as pd
import textwrap
import plotly.graph_objs as go
import plotly.io as pio
from pathlib import Path #To create unique filenames for each PFA chart
import itertools

import src.data.utilities as utils
import src.visualization.prt_theme as prt_theme

config = utils.read_config()
pio.templates.default = "prt_template"

In [26]:
def loadData(status='interim', filename='women_cust_sentence_length_PFA_2010-2022.csv') -> pd.DataFrame:
    """Load CSV file into Pandas DataFrame and convert object columns to categories when they meet criteria in `categoryColumns()`

    Parameters
    ----------
    status : {'raw', 'interim', 'processed'}, default is 'interim'
        Status of the data processing.
        * If 'raw' file is located in "rawFilePath" within config file
        * If 'interim', file is located in "intFilePath"
        * If 'processed', file is located in "clnFilePath"
    filename : str, default is 'PFA_2010-22_women_cust_comm_sus.csv'
        Name of CSV file to be loaded.

    Returns
    -------
    DataFrame
        CSV data is returned as Pandas DataFrame with any eligible object columns converted into category columns to limit memory requirements
    """
    paths = {
        "raw": 'rawFilePath',
        "interim": 'intFilePath',
        "processed": 'clnFilePath'
    }

    dfPath=f"{config['data'][paths[status]]}{filename}"
    df = pd.read_csv(dfPath)
    print('Data loaded')
    return utils.categoryColumns(df)

In [27]:
df = loadData()

Data loaded


In [28]:
my_df = df.copy()
my_df

Unnamed: 0,pfa,year,sentence_length,freq
0,Avon and Somerset,2010,12 months or more,44
1,Avon and Somerset,2010,6 months to less than 12 months,16
2,Avon and Somerset,2010,Less than 6 months,113
3,Avon and Somerset,2011,12 months or more,43
4,Avon and Somerset,2011,6 months to less than 12 months,21
...,...,...,...,...
1633,Wiltshire,2021,6 months to less than 12 months,3
1634,Wiltshire,2021,Less than 6 months,15
1635,Wiltshire,2022,12 months or more,7
1636,Wiltshire,2022,6 months to less than 12 months,5


In [5]:
def annotation_yvals():
    y_list = [fig.data[i]['y'][-1] for i in range(len(fig.data))] #selecting last y value for each trace 
    return y_list

In [6]:
def trace_max():
    trace_max_list = [(fig.data[i]['y']).max() for i in range(len(fig.data))] #Selecting maximum value from each trace
    return trace_max_list

In [7]:
def check_duplicates(y_vals, trace_max):
    duplicate_vals = [idx for idx, value in enumerate(y_vals) if y_vals.count(value) > 1]
    if len(duplicate_vals) > 0:
        print(f'Duplicates found: index {duplicate_vals}\nUpdating...')
        
        max_i = 0
        for idx in duplicate_vals:
            if trace_max[idx] > max_i:
                max_i = idx
        y_vals[max_i] = y_vals[max_i] * 1.2
        annotations[max_i]['y'] = y_vals[max_i]

In [8]:
def check_overlap(l, space):
    return all(x2-x1 >= space for x1,x2 in itertools.pairwise(sorted(l)))

In [9]:
def adjust_overlap(l, space):
    for (idx1,num1), (idx2,num2) in itertools.permutations(enumerate(l), 2):
        difference = abs(l[idx1] - l[idx2])
        if difference < space:
            largest = max((idx1,num1), (idx2,num2), key=lambda x:x[1])
            largest_index = largest[0]
            l[largest_index] = l[largest_index] + (space - difference)
            annotations[largest_index]['y'] = l[largest_index]

In [32]:
for pfa in df['pfa'].unique():
    pfa_df = df[df["pfa"] == pfa]
    trace_list = [] # Need to empty my trace_list with every loop through each PFA so that charts are plotted separately
    fig = go.Figure() # Need to also instantiate the figure with every loop in order to clear fig.data values

    for i in pfa_df["sentence_length"].unique():  # Creating a for loop to extract unique values from the dataframe and make traces
        pfa_df_sentence = pfa_df[pfa_df["sentence_length"] == i]
        
        trace = go.Scatter(
            x=pfa_df_sentence["year"],
            y=pfa_df_sentence["freq"],
            mode="lines",
            name=str(pfa_df_sentence["sentence_length"].iloc[0]),
            meta=pfa_df_sentence["pfa"].iloc[0],   # Adding name of PFA in metadata to ensure data relates to only one area 
            hovertemplate="%{y}<extra></extra>"
        )

        trace_list.append(trace)

    fig.add_traces(trace_list)

    ## Chart title
    title = textwrap.wrap(f'<b>Use of immediate imprisonment for women in {pfa_df_sentence["pfa"].iloc[0]} 2009–2021</b>', width=45)

    fig.update_layout(
        margin=dict(l=63, b=75, r=100),
        title="<br>".join(title),
        yaxis_title="",
        yaxis_tickformat=",.0f",
        yaxis_tick0=0,
        xaxis_dtick=2,
        xaxis_tick0=2010,
        hovermode="x",
        width=655,
        height=500,
    )

    ## Chart annotations
    annotations = []

    # Adding trace annotations
    for j in range(0, len(trace_list)):
        annotations.append(
            dict(
                xref="x",
                yref="y",
                x=fig.data[j].x[-1],
                y=fig.data[j].y[-1],
                text=str(fig.data[j].name),
                xanchor="left",
                align="left",
                showarrow=False,
                font_color=fig.layout.template.layout.colorway[j],
                font_size=10,
            )
        )
    # # Adding source label
    source = prt_theme.sourceAnnotation("Ministry of Justice, Criminal justice statistics", annotations)

    # Adding y-axis label
    annotations.append(
        dict(
            xref="x",
            yref="paper",
            x=df['year'].iloc[0],
            y=1.04,
            align="left",
            xanchor="left",
            showarrow=False,
            text="Women sentenced to custody",
            font_size=12,
        )
    )
    
    # Checking for overlapping annotations on trace labels
    y_vals = annotation_yvals()
    
    space = 5
    if check_overlap(y_vals, space) == False:
        adjust_overlap(y_vals, space)
    
    # Adding annotations to layout
    fig.update_layout(annotations=annotations)

    ## Setting chart axis ranges
    max_y_val = 0
    for i in range(len(fig.data)):
        max_trace = (fig.data[i].y).max()
        if max_trace > max_y_val:
            max_y_val = max_trace

    y_intervals = [52, 103, 204, 305, 405, 606, 1210]
    y_max_idx = min(range(len(y_intervals)), key = lambda i: abs(y_intervals[i]-max_y_val))
    if y_intervals[y_max_idx] <= max_y_val:
        y_max = y_intervals[y_max_idx + 1]
    else: 
        y_max = y_intervals[y_max_idx]

    # y_max = y_intervals[min(range(len(y_intervals)), key = lambda i: abs(y_intervals[i]-max_y_val))]
    
    fig.update_yaxes(range=[0, y_max])
    fig.update_xaxes(range=[2009.7, 2022.3])

    fig.show()
    # fig.show(config=config)

    ## Exporting to static image

    # Save results to ../reports/figures/custody_sentence_lengths, generate if does not exist.
    export_path = Path.joinpath(Path.cwd().parent, "reports", "figures", "custody_sentence_lengths_test")
    export_path.mkdir(parents=True, exist_ok=True)

    # Setting filename variable and full path

    filename = str(pfa_df_sentence["pfa"].iloc[0])
    export_eps_path = Path.joinpath(export_path, f'{filename}' + '.eps')

    # fig.write_image(export_eps_path)

In [16]:
max_trace = 0
for i in range(len(fig.data)):
    max_val = (fig.data[i].y).max()
    if max_val > max_trace:
        max_trace = max_val
    print(max_val)
max_trace

7
38
15


38

In [None]:
y_intervals = [55, 105, 205, 305, 405, 1210]
y_max_idx = min(y_intervals, key = lambda x: abs(x-max_y_val))
if y_intervals[y_max_idx] < max_y_val:
    y_max = y_intervals[y_max_idx + 1]
else: 
    y_max = y_intervals[y_max_idx]