In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import pandas as pd
import datetime
import dateparser
import dateutil
import re
import random
import matplotlib.pyplot as plt

In [None]:
date_code = {
    np.nan: "00",
    None: "00",
    "7/11/16" : "01",
    "7/12/16" : "02",
    "7/13/16" : "03",
    "7/14/16" : "04",
    "7/15/16" : "05",
    "7/5/16" : "06",
    "7/6/16" : "07",
    "7/7/16" : "08",
    "7/8/16" : "09",
    "8/30/16" : "10",
    "8/31/16" : "11",
    "9/1/16" : "12",
    "9/12/16" : "13",
    "9/13/16" : "14",
    "9/14/16" : "15",
    "9/15/16" : "16",
    "9/19/16" : "17",
    "9/2/16" : "18",
    "9/20/16" : "19",
    "9/21/16" : "20",
    "9/22/16" : "21",
    "9/23/16" : "22",
    "9/6/16" : "23",
    "9/7/16" : "24",
    "9/8/16" : "25",
    "9/9/16" : "26"
}
courthouse_code = {
    np.nan: "00",
    None : "0",
    "Bridgeview" : "1",
    "Central" : "2",
    "Markham" : "3",
    "Maywood" : "4",
    "Rolling Meadows" : "5",
    "Skokie" : "6"
}
observer_code = {
    np.nan: "00",
    None : "00",
    "Adrienne" : "01",
    "Adrienne & Monica" : "02",
    "Adrienne & Sam" : "03",
    "Amani" : "04",
    "Amani & Asif" : "05",
    "Amani & Maya" : "06",
    "Asif" : "07",
    "Camille" : "08",
    "Emily" : "09",
    "Emily & Sam" : "10",
    "Maya & Hafsa" : "11",
    "Hafsa & Maya" : "11",
    "Jeanne" : "12",
    "Leonor" : "13",
    "Sam": "14",
    "Maya & Asif" : "15",
    "Leonor & Maya" : "16",
    "Maya": "17",
    "Maya J": "18"
}

In [None]:
remove_charges = set([])
charges = pd.read_csv("../Data/Helper Data/Unique Charges from Court.csv")
charge_dict = {}
for i, row in charges.iterrows():
    if row["EXCLUDE"] == "YES":
        remove_charges.add(row["CRIME"])
        continue
        
    charge_dict[row["CRIME"]] = {
        "Violent" : row["VIOLENT"],
        "Criminal Justice System Related" : row["CJS RELATED CRIME"],
        "Gun Related" : row["GUN INVOLVED"],
        "Police Related" : row["POLICE RELATED CRIME"],
        "Broad Charge Type" : row["BROAD TYPE"],
        "Narrow Charge Type" : row["NARROW TYPE"]
    }

In [None]:
df = pd.read_csv("../Data/Cleaned Data/Cleaned_Courthouse_9.csv", header = 0)

In [None]:
new_col = []
for duration in df["Duration"]:
    if pd.notnull(duration):
        hours, minutes, seconds = [int(x) for x in duration.split(":")]
        time = datetime.timedelta(hours = hours, minutes = minutes, seconds= seconds)
        new_col.append(time)
    else:
        new_col.append(None)
df["Duration"] = new_col

In [None]:
ids = []
no_names = []
pattern = re.compile('[\W_]+')
for i, row in df.iterrows():
    name_list = [""]
    if pd.notnull(row["Last Name"]):
        name_list.append(pattern.sub('', row["Last Name"]))
    if pd.notnull(row["First Name"]):
        name_list.append(pattern.sub('', row["First Name"]))
    name = "".join(name_list)
    if len(name) == 0:
        no_names.append(1)
    else:
        no_names.append(0)
    id_list = [date_code[row["Date"]], observer_code[row["Observer(s)"]], courthouse_code[row["Courthouse"]], name.upper()]
    id_str = "".join(id_list)
    ids.append(id_str)

df["Individual ID"] = ids
df["No Name"] = no_names

In [None]:
def uniqueid():
    seed = random.getrandbits(32)
    while True:
       yield seed
       seed += 1

In [None]:
unique_sequence = uniqueid()
unique_ids = set(ids)
anon_id_dict = {}
for iid in unique_ids:
    anon_id_dict[iid] = next(unique_sequence)

anon_ids = []
charge_ids = []
for i, row in df.iterrows():
    anon_ids.append(anon_id_dict[row["Individual ID"]])
    charge_ids.append(next(unique_sequence))
    
df["Anonymous Individual ID"] = anon_ids
df["Charge ID"] = charge_ids

In [None]:
df

In [None]:
df.to_csv("../Output/safety_check.csv", index = False)

In [None]:
individuals = {}
for i, row in df.iterrows():

    anon_id = row["Anonymous Individual ID"]
    
    if anon_id in individuals.keys():
        individuals[anon_id]["Charge Count"] += 1
        individuals[anon_id]["Charge List"].append(row["Charge"])
    else:
        individuals[anon_id] = {
        "Charge Count" : 1,
        "Judge" : row["Judge"],
        "Race" : row["Race"],
        "Charge List" : [row["Charge"]]
        }
        
        #Last Name and First Name will not be present in anonymized data
        
        if "Last Name" in df.columns:
            individuals[anon_id]["Last Name"] = row["Last Name"]
        if "First Name" in df.columns:
            individuals[anon_id]["First Name"] = row["First Name"]

In [None]:
mult_charge = []
for iid, info in individuals.items():
    if info["Charge Count"] > 1:
        mult_charge.append([iid, info["Charge List"]])

In [None]:
len(mult_charge)

In [None]:
mci = pd.DataFrame(mult_charge, columns = ["ID", "Charges"])

In [None]:
mci.to_csv("../Output/Individuals-with-multiple-charges.csv", index = False)

In [None]:
new_df_list = []

cols = ["Hearing ID", "Charge ID", "Number of Charges at Hearing", "Charge", "Class", "Broad Charge Type", \
        "Narrow Charge Type", "Violent", "Police Related", "Gun Related", "Criminal Justice System Related", \
        "Duration in Seconds", "Criminal Risk Scale", "No Show Risk Scale", "Old Risk Scale", "Judge", \
        "Courthouse", "Bond", "Bond Type", "Date"]
for i, row in df.iterrows():
    new_row = []
    if row["Charge"] in remove_charges or pd.isnull(row["Charge"]):
        continue
    
    if row["No Name"] == 1:
        continue
        
    if pd.isnull(row["Final Bond"]) or row["Final Bond"] == 0:
        continue
        
    new_row.append(row["Anonymous Individual ID"])
    new_row.append(row["Charge ID"])
    new_row.append(individuals[row["Anonymous Individual ID"]]["Charge Count"])
    new_row.append(row["Charge"])
    new_row.append(row["Class"])
    new_row.append(charge_dict[row["Charge"]]["Broad Charge Type"])
    new_row.append(charge_dict[row["Charge"]]["Narrow Charge Type"])
    new_row.append(charge_dict[row["Charge"]]["Violent"])
    new_row.append(charge_dict[row["Charge"]]["Police Related"])
    new_row.append(charge_dict[row["Charge"]]["Gun Related"])
    new_row.append(charge_dict[row["Charge"]]["Criminal Justice System Related"])
    if pd.notnull(row["Duration"]):
        new_row.append(row["Duration"].seconds)
    else:
        new_row.append(None)
    new_row.append(row["Other Criminal Activity Risk (Scale)"])
    new_row.append(row["No show risk (Scale)"])
    new_row.append(row["Scale (if only one number)"])
    new_row.append(row["Judge"])
    new_row.append(row["Courthouse"])
    new_row.append(row["Final Bond"])
    new_row.append(row["Bond Type"])
    new_row.append(row["Date"])
    
    new_df_list.append(new_row)
    

In [None]:
new_df = pd.DataFrame(new_df_list, columns = cols)

new_df.to_csv("../Output/All-Charges-for-Analysis-with-Dates.csv", index = False)