In [471]:
"""
STATE: AL
POLICY: Parole Grant Rates
VERSION: V1
DATA SOURCE: Public AL reports
DATA QUALITY: medium-low
HIGHEST PRIORITY MISSING DATA: N/A
REFERENCE_DATE: October 2022
TIME_STEP: Month
ADDITIONAL NOTES: Initial policy scoping doc https://docs.google.com/document/d/1mj6Fmm3aCmx08PqhNShV6Rb8MCRuHQ2D56BmxeJKxKg/edit?usp=sharing
"""

import logging
import pandas as pd
import numpy as np
import pdb
import matplotlib as plt
from IPython.display import display
import itertools


%run /Users/jazz/Recidiviz/recidiviz-research/utils/research_utils.py

pd.options.display.max_colwidth = None
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("max_seq_items", None)
pd.set_option("display.width", None)

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [472]:
import sys

sys.path.insert(1, "/Users/jazz/Recidiviz/pulse-data")

from recidiviz.calculator.modeling.population_projection.super_simulation.time_converter import (
    TimeConverter,
)
from recidiviz.calculator.modeling.population_projection.utils.spark_bq_utils import (
    upload_spark_model_inputs,
)
from recidiviz.calculator.modeling.population_projection.utils.spark_preprocessing_utils import (
    convert_dates,
)
from recidiviz.utils.yaml_dict import YAMLDict

logger = logging.getLogger(__name__)
logger.setLevel(level=logging.INFO)

# Get the simulation tag from the model inputs config
yaml_file_path = "tx_gun_minimums_v1.yaml"

simulation_config = YAMLDict.from_path(yaml_file_path)
data_inputs = simulation_config.pop_dict("data_inputs")
simulation_tag = data_inputs.pop("big_query_simulation_tag", str)

# Convert the timestamps to time_steps (relative ints), with 0 being the most recent
# date of data (Sept. 2020)
reference_date = simulation_config.pop("reference_date", float)
time_step = simulation_config.pop("time_step", float)
time_converter = TimeConverter(reference_year=reference_date, time_step=time_step)

In [473]:
prison_admissions_raw = pd.read_csv(
    "input_data/TX Gun MM Raw Data - prison_admissions.csv"
)

In [474]:
month_array = pd.DataFrame(
    pd.date_range("2012-09-01", "2022-08-01", freq="MS"), columns=["month"]
)
month_array["fiscal_year"] = month_array.month.dt.to_period("Q-AUG").dt.qyear
prison_admissions = pd.merge(prison_admissions_raw, month_array, how="inner")
prison_admissions = prison_admissions.drop(columns="fiscal_year")

In [475]:
# fraction of total prison admissions in FY2022 (37428) that were of the affected offense type (4791)
ratio_relevant_offenses = 4791 / prison_admissions.iloc[-1].admissions

affected_prison_admissions_per_year = 1708

# fraction of relevant admissions (4791) that also had a deadly weapon involved (1708)
ratio_deadly_weapon = affected_prison_admissions_per_year / 4791

prison_admissions["deadly_weapon_monthly"] = (
    prison_admissions.admissions * ratio_relevant_offenses * ratio_deadly_weapon / 12
)

In [476]:
prison_outflows = prison_admissions.rename(
    columns={"deadly_weapon_monthly": "total_population"}
)
prison_outflows.total_population = prison_outflows.total_population.round(1)
prison_outflows.loc[:, "time_step"] = convert_dates(
    time_converter, prison_outflows["month"]
)
prison_outflows = prison_outflows.drop(columns=["admissions", "month"])
prison_outflows

Unnamed: 0,compartment,outflow_to,total_population,time_step
0,pretrial,prison,163.3,-124
1,pretrial,prison,163.3,-123
2,pretrial,prison,163.3,-122
3,pretrial,prison,163.3,-121
4,pretrial,prison,163.3,-120
5,pretrial,prison,163.3,-119
6,pretrial,prison,163.3,-118
7,pretrial,prison,163.3,-117
8,pretrial,prison,163.3,-116
9,pretrial,prison,163.3,-115


In [477]:
probation_adm = float(round(1982 / 12 * ratio_deadly_weapon))
probation_adm

59.0

In [478]:
probation_outflows = pd.DataFrame(prison_outflows.time_step, columns=["time_step"])
probation_outflows["compartment"] = "pretrial"
probation_outflows["outflow_to"] = "probation"
probation_outflows["total_population"] = probation_adm
probation_outflows

Unnamed: 0,time_step,compartment,outflow_to,total_population
0,-124,pretrial,probation,59.0
1,-123,pretrial,probation,59.0
2,-122,pretrial,probation,59.0
3,-121,pretrial,probation,59.0
4,-120,pretrial,probation,59.0
5,-119,pretrial,probation,59.0
6,-118,pretrial,probation,59.0
7,-117,pretrial,probation,59.0
8,-116,pretrial,probation,59.0
9,-115,pretrial,probation,59.0


In [479]:
outflows = pd.concat([prison_outflows, probation_outflows])
outflows["crime_type"] = "gun"
outflows

Unnamed: 0,compartment,outflow_to,total_population,time_step,crime_type
0,pretrial,prison,163.3,-124,gun
1,pretrial,prison,163.3,-123,gun
2,pretrial,prison,163.3,-122,gun
3,pretrial,prison,163.3,-121,gun
4,pretrial,prison,163.3,-120,gun
5,pretrial,prison,163.3,-119,gun
6,pretrial,prison,163.3,-118,gun
7,pretrial,prison,163.3,-117,gun
8,pretrial,prison,163.3,-116,gun
9,pretrial,prison,163.3,-115,gun


# transitions

In [480]:
# pull baseline sentence length distribution for everyone in TX prisons
input_trans = pd.read_csv(
    "input_data/TX Gun MM Raw Data - transitions_prison.csv", thousands=","
)
input_trans

Unnamed: 0,bin,sen_count,years
0,2 Years,8363.0,1
1,3 to 5 Years,14735.0,4
2,6 to 10 Years,8251.0,8
3,11 to 20 Years,3550.0,15
4,21 to 30 Years,1137.0,25
5,31 to 40 Years,526.0,35
6,41 to 59 Years,327.0,50
7,60+,539.0,80


In [481]:
7.8 / 7.3

1.0684931506849316

In [482]:
avg_sen_lengths_by_crime_type = (
    pd.read_csv("input_data/TX Gun MM Raw Data - avg_sen_length_by_type.csv")
    .rename(columns={"Unnamed: 0": "crime_type"})
    .loc[:3]
)
# find arbitrary scale factor to multiply the sentence distribution
#     for each crime type by a scalar to stretch/compress
#     distribution to match average sentence length
total_avg_sentence_length = 7.3
calibration_factor = 1.0684931506849316
calibrated_total_avg_sentence_length = total_avg_sentence_length * calibration_factor
avg_sen_lengths_by_crime_type["multiplier"] = (
    avg_sen_lengths_by_crime_type.avg_sentence_length
    / calibrated_total_avg_sentence_length
)
avg_sen_lengths_by_crime_type.sort_values(by="avg_sentence_length")

Unnamed: 0,crime_type,avg_sentence_length,multiplier
3,Other,4.9,0.628205
1,Property,5.4,0.692308
2,Drug,5.7,0.730769
0,Violent,10.2,1.307692


In [483]:
# Do the multiply step with that scale factor
trans = pd.merge(input_trans, avg_sen_lengths_by_crime_type, how="cross")
trans["years_adj"] = trans.years * trans.multiplier

In [484]:
# check that avg sens match the reported numbers above
trans.groupby("crime_type").apply(
    lambda x: np.average(x.years_adj, weights=x.sen_count)
).sort_values()

crime_type
Other        4.915569
Property     5.417158
Drug         5.718111
Violent     10.232409
dtype: float64

In [485]:
trans["sen_months"] = trans.years_adj * 12
trans.sort_values(by=["crime_type", "years"])

Unnamed: 0,bin,sen_count,years,crime_type,avg_sentence_length,multiplier,years_adj,sen_months
2,2 Years,8363.0,1,Drug,5.7,0.730769,0.730769,8.769231
6,3 to 5 Years,14735.0,4,Drug,5.7,0.730769,2.923077,35.076923
10,6 to 10 Years,8251.0,8,Drug,5.7,0.730769,5.846154,70.153846
14,11 to 20 Years,3550.0,15,Drug,5.7,0.730769,10.961538,131.538462
18,21 to 30 Years,1137.0,25,Drug,5.7,0.730769,18.269231,219.230769
22,31 to 40 Years,526.0,35,Drug,5.7,0.730769,25.576923,306.923077
26,41 to 59 Years,327.0,50,Drug,5.7,0.730769,36.538462,438.461538
30,60+,539.0,80,Drug,5.7,0.730769,58.461538,701.538462
3,2 Years,8363.0,1,Other,4.9,0.628205,0.628205,7.538462
7,3 to 5 Years,14735.0,4,Other,4.9,0.628205,2.512821,30.153846


In [486]:
trans_sens = trans.drop(
    columns=["bin", "years", "multiplier", "avg_sentence_length", "years_adj"]
).sort_values(by=["crime_type", "sen_months"])

In [487]:
# pull discharge data (% to parole, % to liberty, etc.)
discharges = pd.read_csv("input_data/TX Gun MM Raw Data - discharge_type.csv")
discharges["percent_of_releases"] = discharges.releases / discharges.releases.sum()
discharges = discharges.drop(columns="releases").sort_values(
    by="percent_of_releases", ascending=False
)
discharges

Unnamed: 0,Category,percent_of_releases
2,Parole,0.518773
3,DMS,0.327416
0,Discharge,0.145362
4,MS,0.004361
1,Community Supervision,0.004087


In [488]:
trans_wth_discharge_type = pd.merge(trans_sens, discharges, how="cross")
trans_wth_discharge_type

Unnamed: 0,sen_count,crime_type,sen_months,Category,percent_of_releases
0,8363.0,Drug,8.769231,Parole,0.518773
1,8363.0,Drug,8.769231,DMS,0.327416
2,8363.0,Drug,8.769231,Discharge,0.145362
3,8363.0,Drug,8.769231,MS,0.004361
4,8363.0,Drug,8.769231,Community Supervision,0.004087
5,14735.0,Drug,35.076923,Parole,0.518773
6,14735.0,Drug,35.076923,DMS,0.327416
7,14735.0,Drug,35.076923,Discharge,0.145362
8,14735.0,Drug,35.076923,MS,0.004361
9,14735.0,Drug,35.076923,Community Supervision,0.004087


In [489]:
# read los data, by crime category and discharge type
los_by_sen_type = (
    pd.read_csv("input_data/TX Gun MM Raw Data - los_per_crime_type.csv")
    .loc[:4]  # ignore "Total" last row
    .melt(
        id_vars=["Category"],
        value_vars=["Violent", "Property", "Drug", "Other"],
        var_name="crime_type",
        value_name="los_percent_of_sen",
    )
)
los_by_sen_type

Unnamed: 0,Category,crime_type,los_percent_of_sen
0,Community Supervision,Violent,0.104
1,Parole,Violent,0.682
2,DMS,Violent,0.637
3,MS,Violent,0.722
4,Discharge,Violent,1.0
5,Community Supervision,Property,0.141
6,Parole,Property,0.474
7,DMS,Property,0.615
8,MS,Property,0.838
9,Discharge,Property,1.0


In [490]:
trans_with_los_by_discharge = pd.merge(
    trans_wth_discharge_type, los_by_sen_type, how="inner"
)

In [491]:
trans_with_los_by_discharge["total_population"] = (
    trans_with_los_by_discharge.sen_count
    * trans_with_los_by_discharge.percent_of_releases
)
trans_with_los_by_discharge["prison_los"] = (
    trans_with_los_by_discharge.sen_months
    * trans_with_los_by_discharge.los_percent_of_sen
)
trans_with_los_by_discharge["parole_los"] = (
    trans_with_los_by_discharge.sen_months - trans_with_los_by_discharge.prison_los
)

In [492]:
# exclude Drug crime type
trans_affected = trans_with_los_by_discharge[
    trans_with_los_by_discharge.crime_type != "Drug"
].copy()

# arbitrary scalar to match affected population LOS from impact report (8.7 yrs)
affected_firearm_scalar = 8.7 / 4.447
trans_affected.prison_los = (
    affected_firearm_scalar * trans_with_los_by_discharge.prison_los
)
trans_affected.prison_los = round(trans_affected.prison_los).astype(float)

trans_affected.parole_los = (
    affected_firearm_scalar * trans_with_los_by_discharge.parole_los
)
trans_affected.parole_los = round(trans_affected.parole_los).astype(float)

trans_affected

Unnamed: 0,sen_count,crime_type,sen_months,Category,percent_of_releases,los_percent_of_sen,total_population,prison_los,parole_los
40,8363.0,Other,7.538462,Parole,0.518773,0.449,4338.495959,7.0,8.0
41,14735.0,Other,30.153846,Parole,0.518773,0.449,7644.115503,26.0,33.0
42,8251.0,Other,60.307692,Parole,0.518773,0.449,4280.393418,53.0,65.0
43,3550.0,Other,113.076923,Parole,0.518773,0.449,1841.643029,99.0,122.0
44,1137.0,Other,188.461538,Parole,0.518773,0.449,589.844542,166.0,203.0
45,526.0,Other,263.846154,Parole,0.518773,0.449,272.874432,232.0,284.0
46,327.0,Other,376.923077,Parole,0.518773,0.449,169.638668,331.0,406.0
47,539.0,Other,603.076923,Parole,0.518773,0.449,279.618477,530.0,650.0
48,8363.0,Other,7.538462,DMS,0.327416,0.593,2738.182969,9.0,6.0
49,14735.0,Other,30.153846,DMS,0.327416,0.593,4824.479977,35.0,24.0


In [493]:
np.average(trans_affected.prison_los, weights=trans_affected.total_population) / 12

8.703936879956446

In [494]:
prison_trans = trans_affected[
    ["crime_type", "prison_los", "total_population", "Category"]
].copy()
prison_trans["compartment"] = "prison"
prison_trans["outflow_to"] = np.where(
    prison_trans.Category == "Discharge", "liberty", "parole"
)
prison_trans = prison_trans.rename(columns={"prison_los": "compartment_duration"})
prison_trans.compartment_duration = prison_trans.compartment_duration.clip(
    upper=40 * 12
)

In [495]:
prison_transitions = (
    prison_trans.groupby(["compartment", "outflow_to", "compartment_duration"])[
        ["total_population"]
    ]
    .sum()
    .reset_index()
)
prison_yearly_admission_scale_factor = (
    affected_prison_admissions_per_year / prison_transitions.total_population.sum()
)
prison_transitions.total_population = (
    prison_yearly_admission_scale_factor * prison_transitions.total_population
).round(2)
prison_transitions

Unnamed: 0,compartment,outflow_to,compartment_duration,total_population
0,prison,liberty,15.0,18.49
1,prison,liberty,16.0,18.49
2,prison,liberty,31.0,18.49
3,prison,liberty,59.0,32.58
4,prison,liberty,65.0,32.58
5,prison,liberty,118.0,18.24
6,prison,liberty,123.0,32.58
7,prison,liberty,130.0,18.24
8,prison,liberty,221.0,7.85
9,prison,liberty,244.0,7.85


In [496]:
np.average(
    prison_transitions.compartment_duration, weights=prison_transitions.total_population
) / 12

7.650253915093606

In [497]:
parole_trans = trans_affected[trans_affected.parole_los > 0][
    ["crime_type", "parole_los", "total_population"]
].copy()
parole_trans["compartment"] = "parole"
parole_trans["outflow_to"] = "liberty"
parole_trans = parole_trans.rename(columns={"parole_los": "compartment_duration"})
parole_trans.compartment_duration = parole_trans.compartment_duration.clip(
    upper=40 * 12
)

In [498]:
parole_transitions = (
    parole_trans.groupby(["compartment", "outflow_to", "compartment_duration"])[
        ["total_population"]
    ]
    .sum()
    .reset_index()
)

# rough scale factor for readability
yearly_admission_scale_factor = (
    affected_prison_admissions_per_year / parole_transitions.total_population.sum()
)
parole_transitions.total_population = (
    yearly_admission_scale_factor * parole_transitions.total_population
).round(2)
parole_transitions

Unnamed: 0,compartment,outflow_to,compartment_duration,total_population
0,parole,liberty,3.0,0.65
1,parole,liberty,5.0,0.65
2,parole,liberty,6.0,97.47
3,parole,liberty,8.0,77.22
4,parole,liberty,9.0,77.87
5,parole,liberty,10.0,77.22
6,parole,liberty,11.0,49.88
7,parole,liberty,13.0,0.61
8,parole,liberty,14.0,0.61
9,parole,liberty,19.0,1.14


In [499]:
np.average(
    parole_transitions.compartment_duration, weights=parole_transitions.total_population
) / 12

5.26481155143915

### probation trans

In [500]:
# pulling felony probation sentence lengths from TX DOC reports
# evenly distributed within buckets, 10 year max
probation_trans = pd.read_csv("input_data/TX Gun MM Raw Data - probation_los.csv")
probation_trans["compartment"] = "probation"
probation_trans["outflow_to"] = "liberty"
probation_trans = probation_trans.rename(columns={"probation_pop": "total_population"})
probation_trans.compartment_duration = 12 * probation_trans.compartment_duration
probation_trans

Unnamed: 0,compartment_duration,total_population,compartment,outflow_to
0,6.0,139.5,probation,liberty
1,12.0,139.5,probation,liberty
2,18.0,11338.375,probation,liberty
3,24.0,11338.375,probation,liberty
4,30.0,11338.375,probation,liberty
5,36.0,11338.375,probation,liberty
6,42.0,11338.375,probation,liberty
7,48.0,11338.375,probation,liberty
8,54.0,11338.375,probation,liberty
9,60.0,11338.375,probation,liberty


In [501]:
transitions = pd.concat([prison_transitions, parole_transitions, probation_trans])
transitions["crime_type"] = "gun"

In [502]:
manual_row = ["liberty", "liberty", 1.0, 1.0, "gun"]
transitions.loc[len(transitions)] = manual_row

In [503]:
transitions

Unnamed: 0,compartment,outflow_to,compartment_duration,total_population,crime_type
0,prison,liberty,15.0,18.49,gun
1,prison,liberty,16.0,18.49,gun
2,prison,liberty,31.0,18.49,gun
3,prison,liberty,59.0,32.58,gun
4,prison,liberty,65.0,32.58,gun
5,prison,liberty,118.0,18.24,gun
6,prison,liberty,123.0,32.58,gun
7,prison,liberty,130.0,18.24,gun
8,prison,liberty,221.0,7.85,gun
9,prison,liberty,244.0,7.85,gun


### Policy Transitions

In [504]:
alt_transitions = transitions.copy()
alt_transitions.compartment_duration = alt_transitions.compartment_duration.mask(
    (alt_transitions.compartment == "prison")
    | (alt_transitions.compartment == "probation"),
    alt_transitions.compartment_duration.clip(lower=120),
)
alt_transitions = (
    alt_transitions.groupby(["compartment", "outflow_to", "compartment_duration"])[
        ["total_population"]
    ]
    .sum()
    .reset_index()
)
alt_transitions["crime_type"] = "gun"

# manual_row = ["probation", "liberty", 1.0, 1.0,'gun']
# alt_transitions.loc[len(alt_transitions)] = manual_row
alt_transitions

Unnamed: 0,compartment,outflow_to,compartment_duration,total_population,crime_type
0,liberty,liberty,1.0,1.0,gun
1,parole,liberty,3.0,0.65,gun
2,parole,liberty,5.0,0.65,gun
3,parole,liberty,6.0,97.47,gun
4,parole,liberty,8.0,77.22,gun
5,parole,liberty,9.0,77.87,gun
6,parole,liberty,10.0,77.22,gun
7,parole,liberty,11.0,49.88,gun
8,parole,liberty,13.0,0.61,gun
9,parole,liberty,14.0,0.61,gun


In [505]:
alt_transitions.to_csv("alternate_transitions_no_probation.csv", index=False)

In [506]:
simulation_tag

'tx_gun_minimums_v1'

In [507]:
upload_spark_model_inputs(
    project_id="recidiviz-staging",
    simulation_tag=simulation_tag,
    outflows_data_df=outflows,
    transitions_data_df=transitions,
    total_population_data_df=pd.DataFrame(),
    yaml_path=yaml_file_path,
)

100%|████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 7096.96it/s]
100%|███████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 13148.29it/s]


In [508]:
transitions.groupby(["compartment", "outflow_to", "crime_type"])[
    ["compartment_duration"]
].min()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,compartment_duration
compartment,outflow_to,crime_type,Unnamed: 3_level_1
liberty,liberty,gun,1.0
parole,liberty,gun,3.0
prison,liberty,gun,15.0
prison,parole,gun,2.0
probation,liberty,gun,6.0


In [509]:
alt_transitions.groupby(["compartment", "outflow_to", "crime_type"])[
    ["compartment_duration"]
].min()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,compartment_duration
compartment,outflow_to,crime_type,Unnamed: 3_level_1
liberty,liberty,gun,1.0
parole,liberty,gun,3.0
prison,liberty,gun,120.0
prison,parole,gun,120.0
probation,liberty,gun,120.0


In [510]:
alt_transitions.groupby(["compartment", "outflow_to", "crime_type"])[
    ["compartment_duration"]
].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,compartment_duration
compartment,outflow_to,crime_type,Unnamed: 3_level_1
liberty,liberty,gun,1.0
parole,liberty,gun,480.0
prison,liberty,gun,480.0
prison,parole,gun,480.0
probation,liberty,gun,120.0
