The purpose of this notebook is to provide an example application of the statutory ranking algorithm.<br>
Data in this folder is from South Carolina (originally downloaded from [sccourts.org](https://www.sccourts.org/cdr/))

In [None]:
# Import libraries
import pandas as pd

# CONSTANTS
INSERT_INDEX = 7
NUM_DISPLAY_FEATURES = 12


# Read SC CDR list
cdrs = pd.read_csv("sc_cdrs_2021.csv")

Data exploration

In [None]:
print(f"Number of CDRs & features available: {cdrs.shape}")

In [None]:
cols_to_review = list(cdrs.columns[:NUM_DISPLAY_FEATURES])

cdrs[cols_to_review].head()

Create a boolean for whether the offense is a capital offense & associated rank

In [None]:
cdrs.insert(
    INSERT_INDEX,
    "capital_offense",
    (
        (cdrs.RangeOfPunishment.str.contains("DEATH", case=False, na=False)  # Where death is a possible punishment
        & ((cdrs.max_time_days>0) | (cdrs.min_time_days>0)))  # Where the punishment is unambiguous (c.f. CDR 3304)
    )
)

INSERT_INDEX += 1
cdrs.insert(INSERT_INDEX, "capital_rank", 2)  # Default capital_offense rank (since most are not capital offenses)
cdrs["capital_rank"].mask(cdrs["capital_offense"], 1, inplace=True)
NUM_DISPLAY_FEATURES += 2  # +2 for capital_offense & capital_rank features

all_features_df = cdrs.copy()

## Create individual rankings per feature

In [None]:
def create_rank_feature(df_, base_feat, new_feat, insert_index):
    """
    Inserts a new "rank" feature into a dataframe using a passed index
    :param df_: DataFrame
    :param base_feat: str
    :param new_feat: str
    :param insert_index: int
    :returns df_: DataFrame
    """

    # Rank the base features in descending order to put the highest values first
    df_.insert(insert_index, new_feat, df_[base_feat].rank(method="min", ascending=False))
    # Fill any missing ranks with a rank well above the current maximum
    df_[new_feat].mask(df_[base_feat].isna(), df_[new_feat].max()+1000, inplace=True)
    # Reset all ranks to int
    df_[new_feat] = df_[new_feat].astype(int)

    return df_

In [None]:
# NOTE: "capital_rank" computed using booleans (c.f. numeric)
ranking_features = ["max_time_days", "min_time_days", "max_fine", "min_fine"]
for i, feat in enumerate(ranking_features):
    INSERT_INDEX += 2  # Insert rank to the right of base feature
    new_feat = f"{feat}_rank"
    all_features_df = create_rank_feature(all_features_df, feat, new_feat, INSERT_INDEX)
    NUM_DISPLAY_FEATURES += 1  # Show the rank feature as well

cols_to_review = list(all_features_df.columns[:NUM_DISPLAY_FEATURES])

In [None]:
pd.set_option('display.max_columns', None)
all_features_df.head()

## Create combined ranking
NOTE: Can't just concat all numeric features as str because (e.g.) str(1180) < str(4)

In [None]:
combo_df = all_features_df[cols_to_review].sort_values(by=["capital_rank", "max_time_days_rank", "min_time_days_rank", "max_fine_rank", "min_fine_rank"])

# Create boolean of where the above row is identical to row
combo_df["dup_ranks"] = (
    combo_df[["capital_rank", "max_time_days_rank", "min_time_days_rank", "max_fine_rank", "min_fine_rank"]] == combo_df[["capital_rank", "max_time_days_rank", "min_time_days_rank", "max_fine_rank", "min_fine_rank"]].shift(1, axis="index")
).all(axis="columns")

In [None]:
def add_oa_rank(df_, dup_feat="dup_ranks"):
    """
    Creates a new feature of the "overall" rank (i.e. "oa_rank").
    Takes a dataframe with a feature indicating a duplicate rank
      and assigns the same rank value to all identical rows.
    :param df_:DataFrame
    :param dup_feat:str
    :return df_:DataFrame
    """
    num_uniq_groups = len(df_.loc[~df_[dup_feat]])
    
    df_['oa_rank'] = 3
    df_['oa_rank'].mask(~df_[dup_feat], range(1, num_uniq_groups+1), inplace=True)
    df_['oa_rank'].mask(df_[dup_feat], pd.NA, inplace=True)
    df_['oa_rank'] = df_['oa_rank'].ffill().astype(int)

    return df_

In [None]:
combo_oa_df = add_oa_rank(combo_df)
combo_oa_df["num_in_stat_exposure_group"] = combo_oa_df.groupby("oa_rank")["CDRCode"].transform("count")

In [None]:
# Final ranking
final_df = combo_oa_df[["oa_rank", "num_in_stat_exposure_group"] + cols_to_review].sort_values(by=["oa_rank"])
final_df.head(6)  # Uncomment to show the first 6 rows
# final_df.to_csv("ranked_charges.csv")  # Uncomment to write the final DataFrame to a comma-separated file

In [None]:
# What are the statutory exposure groups with the most charges?
combo_oa_df.oa_rank.value_counts()[:5]

# Appendix: Step-by-step application of sorting

### Rank by capital punishment eligibility

In [None]:
step1 = cdrs.sort_values(by="capital_rank")

In [None]:
INSERT_INDEX = 10
NUM_DISPLAY_FEATURES = 14
cols_to_review = list(step1.columns[:NUM_DISPLAY_FEATURES])

step1[cols_to_review].head()

### Rank by maximum carceral sentence

In [None]:
step1.insert(INSERT_INDEX, "max_time_rank", step1.max_time_days.rank(method="min", ascending=False))
INSERT_INDEX += 2  # insert past the new rank & next base feature
NUM_DISPLAY_FEATURES += 1  # +1 for max_time_rank
cols_to_review = list(step1.columns[:NUM_DISPLAY_FEATURES])
step1["max_time_rank"].mask(step1["max_time_days"].isna(), step1["max_time_rank"].max()+1000, inplace=True)

In [None]:
step2 = step1.sort_values(by=["capital_rank", "max_time_rank"], ascending=[True, True])
step2["max_time_rank"] = step2["max_time_rank"].astype(int)

step2[cols_to_review].head(20)

### Rank by minimum carceral sentence

In [None]:
step2.insert(INSERT_INDEX, "min_time_rank", step1.min_time_days.rank(method="min", ascending=False))
INSERT_INDEX += 2  # insert past the new rank & next base feature
NUM_DISPLAY_FEATURES += 1  # +1 for added rank feature
cols_to_review = list(step2.columns[:NUM_DISPLAY_FEATURES])
step2["min_time_rank"].mask(step2["min_time_days"].isna(), step2["min_time_rank"].max()+1000, inplace=True)

In [None]:
step3 = step2.sort_values(by=["capital_rank", "max_time_rank", "min_time_rank"], ascending=[True, True, True])
step3["min_time_rank"] = step3["min_time_rank"].astype(int)

step3[cols_to_review].head()

### Rank by maximum fine

In [None]:
INSERT_INDEX += 1  # One-time move forward past "conjuct" feature
step4 = create_rank_feature(step3, "max_fine", "max_fine_rank", INSERT_INDEX)
INSERT_INDEX += 2

In [None]:
NUM_DISPLAY_FEATURES += 1  # +1 for added rank feature
cols_to_review = list(step4.columns[:NUM_DISPLAY_FEATURES])
step4[cols_to_review][140:150].sort_values(by=["capital_rank", "max_time_rank", "min_time_rank", "max_fine_rank"])

### Rank by minimum fine

In [None]:
step5 = create_rank_feature(step4, "min_fine", "min_fine_rank", INSERT_INDEX)
NUM_DISPLAY_FEATURES += 1  # +1 for added rank feature
cols_to_review = list(step5.columns[:NUM_DISPLAY_FEATURES])

In [None]:
step5[cols_to_review][140:150].sort_values(by=["capital_rank", "max_time_rank", "min_time_rank", "max_fine_rank", "min_fine_rank"])