# By Price
* Figure generation for "by price" page of dashboard

In [1]:
import ast
import datetime
import itertools
import matplotlib.pyplot as plt
from matplotlib import rc_file_defaults
from matplotlib import ticker

plt.style.use('ggplot')

import os
import sys
import numpy as np
import pandas as pd
import random
import regex as re
import seaborn as sns

from dateutil.relativedelta import relativedelta
from gensim.models.nmf import Nmf
from gensim.corpora import Dictionary
from gensim.models.coherencemodel import CoherenceModel
from scipy.stats import chi2_contingency, mannwhitneyu, wilcoxon
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
from wordcloud import WordCloud

import plotly.express as px
import plotly.graph_objs as go
from ipywidgets import widgets
from plotly.subplots import make_subplots

import json
import copy

In [2]:
# Set various data and figure directories relative to cwd
# (which is hopefully always this file's dirname)
cwd = os.getcwd()
cwd_split = os.path.split(cwd)
srcdir = cwd_split[0]
#sys.path.append(os.path.abspath(os.path.join('..', cwd_split[-1])))
sys.path.append(srcdir)
root = os.path.split(srcdir)[0]
rawdir = os.path.join(root, "data/raw") # Raw csv files; local only!
tmpdir = os.path.join(root, "data/tmp") # Intermediate data products; local only!
extdir = os.path.join(root, "data/external") # Data from external sources; in repo
cleandir = os.path.join(root, "data/cleaned") # Data for dashboard generation; in repo
figdir = os.path.join(root, "figures") # Final figures; in repo

In [3]:
import pltformat
colorDict = pltformat.get_color_dictionary()

# 1. Import and preprocess

In [6]:
import preprocess

## 1(a) 2020 data

In [7]:
# Set raw csv data file paths
docketcsv = "docket_reparse_03_02_2021.csv"
courtcsv = "court_summary.csv"
outcsv = "processed_data.csv"
trimmedcsv = "app_data.csv"

docketpath = os.path.join(rawdir, docketcsv)
courtpath = os.path.join(rawdir, courtcsv)
outpath = os.path.join(tmpdir, outcsv)
trimmedpath = os.path.join(cleandir, trimmedcsv)


In [8]:
# Merge and clean docket and court summary data and save to new csv file
df_2020 = preprocess.merge_and_clean_data(docketpath, courtpath,
                                     outPath=outpath, verbose=True)

Removing 13 cases for which prelim_hearing_dt - bail_date was more than 5...
> Imported 24225 rows with 25 columns:
age
age_group
arrest_dt
arresting_officer
attorney
attorney_type
bail_amount
bail_date
bail_paid
bail_set_bin
bail_set_by
bail_type
case_status
dob
is_bail_posted
is_philly_zipcode
offense_date
offense_type
offenses
prelim_hearing_dt
prelim_hearing_time
race
sex
statute
zip
> Saved new file


## 1(b) 2021 data

In [9]:
# Set raw csv data file paths
docketcsv = "2021-jan_feb_march_dockets.csv"
courtcsv = "2021-jan_feb_march_court.csv"
outcsv = "processed_data_2021.csv"
trimmedcsv = "app_data_2021.csv"

docketpath = os.path.join(rawdir, docketcsv)
courtpath = os.path.join(rawdir, courtcsv)
outpath = os.path.join(tmpdir, outcsv)
trimmedpath = os.path.join(cleandir, trimmedcsv)


In [10]:
# Merge and clean docket and court summary data and save to new csv file
df_2021 = preprocess.merge_and_clean_data(docketpath, courtpath,
                                     outPath=outpath, verbose=True)

Removing 1 cases for which prelim_hearing_dt - bail_date was more than 5...
> Imported 6017 rows with 25 columns:
age
age_group
arrest_dt
arresting_officer
attorney
attorney_type
bail_amount
bail_date
bail_paid
bail_set_bin
bail_set_by
bail_type
case_status
dob
is_bail_posted
is_philly_zipcode
offense_date
offense_type
offenses
prelim_hearing_dt
prelim_hearing_time
race
sex
statute
zip
> Saved new file


# 2. Prepare aggregate data to use for figures  

In [46]:
# concatenate data
df = pd.concat([df_2020, df_2021])
df.reset_index(inplace = True, drop = True)

In [49]:
# create columns 
df['bail_year'] = df['bail_date'].dt.year
df['bail_month'] = df['bail_date'].dt.month

# 3. Comparison of bail amount paid  
* Dropdown 1: year-end comparison of bail amount paid for 2020 vs 2021 (YTD)
* Dropdown 2: monthly summary for year 2020, 2021?

In [206]:
# by month
df_month = df.groupby(['bail_year', 'bail_month'])['bail_paid'].sum()

In [208]:
# save data
bail_paid_month = os.path.join(cleandir, "app_bail_paid.csv")
#df_month.to_csv(bail_paid_month)

In [217]:
# load data
df_month = pd.read_csv(bail_paid_month)

In [231]:
df_year = df_month.groupby(['bail_year'])['bail_paid'].sum()
month_data = df_month['bail_paid']

bail_paid_2020 = df_year[2020]
bail_paid_2021 = df_year[2021]

months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [243]:
# Initialize figure
fig = go.Figure()

# 2020
fig.add_trace(go.Bar(
    x = [0],
    y = [bail_paid_2020],
    orientation = "v",
    text = ["$"+f'{bail_paid_2020:,.0f}'],
    textposition = "inside",
    hoverinfo = "text",
    hovertext = ["2020 <br>$"+f'{bail_paid_2020:,.0f}'],
    showlegend = False
    ))

# 2021
fig.add_trace(go.Bar(
    x = [1],
    y = [bail_paid_2021],
    orientation = "v",
    text = ["$"+f'{bail_paid_2021:,.0f}'],
    textposition = "inside",
    hoverinfo = "text",
    hovertext = ["2021 <br>$"+f'{bail_paid_2021:,.0f}'],
    showlegend = False

    ))

fig.update_layout(
    title="bail paid by year",
    xaxis_title="year",
    yaxis_title="bail amount paid ($)",
    xaxis_tickvals = [0, 1],
    xaxis_ticktext = ["2020","2021 YTD"]
)
fig.show()

In [233]:
# Initialize figure
fig = go.Figure()

# 2020 data
hovertext_2020 = [m + " 2020 <br>" + "$"+f'{v:,.0f}'  
                  for (m,v) in zip(months, month_data[:12])]
fig.add_trace(go.Bar(
    y = month_data[:12],
    name = "2020",
    hoverinfo = "text",
    hovertext = hovertext_2020
    ))

# 2021 data
hovertext_2021 = [m + " 2021 <br>" + "$"+f'{v:,.0f}'
                  for (m,v) in zip(months, month_data[12:])]
fig.add_trace(go.Bar(
    y = month_data[12:],
    name = "2021",
    hoverinfo = "text",
    hovertext = hovertext_2021
    ))

fig.update_layout(
    title="bail paid by month",
    xaxis_title="month",
    yaxis_title="bail amount paid ($)",
    xaxis_tickvals = list(range(len(df_month))),
    xaxis_ticktext = months
    )
    
fig.show()

In [244]:
fig = go.Figure()

### Add traces for yearly summary
# 2020
fig.add_trace(go.Bar(
    x = [0],
    y = [bail_paid_2020],
    orientation = "v",
    text = ["$"+f'{bail_paid_2020:,.0f}'],
    textposition = "inside",
    hoverinfo = "text",
    hovertext = ["2020 <br>$"+f'{bail_paid_2020:,.0f}'],
    showlegend = False
    ))

# 2021
fig.add_trace(go.Bar(
    x = [1],
    y = [bail_paid_2021],
    orientation = "v",
    text = ["$"+f'{bail_paid_2021:,.0f}'],
    textposition = "inside",
    hoverinfo = "text",
    hovertext = ["2021 <br>$"+f'{bail_paid_2021:,.0f}'],
    showlegend = False

    ))

### add traces for monthly summary
# 2020 data
hovertext_2020 = [m + " 2020 <br>" + "$"+f'{v:,.0f}'  
                  for (m,v) in zip(months, month_data[:12])]
fig.add_trace(go.Bar(
    y = month_data[:12],
    name = "2020",
    hoverinfo = "text",
    hovertext = hovertext_2020,
    visible = False
    ))

# 2021 data
hovertext_2021 = [m + " 2021 <br>" + "$"+f'{v:,.0f}'
                  for (m,v) in zip(months, month_data[12:])]
fig.add_trace(go.Bar(
    y = month_data[12:],
    name = "2021",
    hoverinfo = "text",
    hovertext = hovertext_2021,
    visible = False   
    ))


fig.update_layout(
    title="Bail paid by year",
    xaxis_title="year",
    yaxis_title="bail amount paid ($)",
    xaxis_tickvals = [0, 1],
    xaxis_ticktext = ["2020","2021 YTD"]
)

# update
fig.update_layout(
    annotations=[
        dict(text="Summary", x=-0.16, xref="paper", y=1.06, yref="paper",
                             align="left", showarrow=False)
    ])

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label="by year",
                     method="update",
                     args=[{"visible": [True, True, False, False]},
                           {"title": "Bail paid by year",
                           'xaxis': {'title': 'year',
                                     'tickvals' : [0,1],
                                     'ticktext' : ["2020", "2021 YTD"]
                           }}]),
                dict(label="by month",
                     method="update",
                     args=[{"visible": [False, False, True, True]},
                           {"title": "Bail paid by month",
                           'xaxis': {'title': 'month',
                                     'tickvals' : list(range(12)),
                                     'ticktext' : months
                                    }}
                          ])
                    
            ]),
        )
        
    ])



fig.show()