# Creating Database of Bond Court Observations

In [None]:
import pandas as pd
import numpy as np
import datetime
import dateparser
import dateutil
from standardize import standard_classes
from standardize import class_rank
from standardize import judges
from standardize import race_dict

### Read in Data

In [None]:
arrest_lookup = pd.read_csv("../Data/Original Data/ArrestData.csv", header=0) #Hand look ups in arrest database, alternative name spellings
cpd_arrest_data = pd.read_csv("../Data/Original Data/ReleaseData.csv", header=0) #Data dump of arrest database, releases between July 4th and 16th
jail_1 = pd.read_csv("../Data/Original Data/CookCounty07122016.csv", header=0) #jail snapshot from July 11, 2016
jail_2 = pd.read_csv("../Data/Original Data/CookCounty07132016.csv", header=0) #jail snapshot from July 12, 2016
jail_3 = pd.read_csv("../Data/Original Data/CookCounty07142016.csv", header=0) #jail snapshot from July 13, 2016
jail_4 = pd.read_csv("../Data/Original Data/CookCounty07152016.csv", header=0) #jail snapshot from July 14, 2016
jail_5 = pd.read_csv("../Data/Original Data/CookCounty07182016.csv", header=0) #jail snapshot from July 17, 2016
jail_6 = pd.read_csv("../Data/Original Data/CookCounty07192016.csv", header=0) #jail snapshot from July 18, 2016
jail_7 = pd.read_csv("../Data/Original Data/CookCounty07202016.csv", header=0) #jail snapshot from July 19, 2016
jail_8 = pd.read_csv("../Data/Original Data/CookCounty07212016.csv", header=0) #jail snapshot from July 20, 2016
jail_9 = pd.read_csv("../Data/Original Data/CookCounty07222016.csv", header=0) #jail snapshot from July 21, 2016
bond_court = pd.read_csv("../Data/Original Data/CourthouseAbbreviated.csv", header=0) #Bond court observations
courthouse_altnames = pd.read_csv("../Output/Courthouse Only Names.csv", header = None)
fuzzy_altnames = pd.read_csv("../Output/Fuzzy Found Names.csv")
courthouse_lookup = pd.read_csv("../Output/Courthouse Manual Matches.csv", header = 0)
charge_classes = pd.read_csv("../Data/Helper Data/CookCountyAllChargesAndClasses.csv", header = 0)

In [None]:
unclassed_charges = []

### Functions for Later

In [None]:
def new_entry(lastname, firstname, bond_date):
    '''
    Take a last name, first name, and bond court date
    Return dictionary entry with default values
    '''
    entry = {
        "Last Name" : lastname,
        "First Name" : firstname,
        "All Spellings" : set([firstname + " " + lastname]),
        "Bond Court Date" : bond_date,
        "Charges" : [],
        "Days in Police Custody" : None,
        "CPD Arrest Date" : None,
        "CPD Release Date" : None,
        "Booking Date" : None,
        "Total Bond" : None,
        "Highest Charge Rank" : None,
        "Highest Charge Class" : None,
        "Highest Charge Description" : None,
        "Race" : None,
        "Age" : None,
        "Gender" : None,
        "Jail ID" : None,
        "Arrest ID" : None,
        "Jail Record" : False,
        "Arrest Record" : False,
        "Court Record" : False,
        "Unaccounted Custody" : False,
        "Number of Charges" : None,
        "No Bond" : False,
        "Bond Type" : None,
        "Days in Jail" : None,
        "Judge" : None,
        "Old Scale" : None,
        "Threat Risk Scale" : None,
        "No Show Risk Scale" : None,
        "Courthouse" : None,
        "Jail Release Date" : None,
        "Still in Jail After a Week" : False,
        "Courthouse Record" : False,
        "First Name from Observations": None,
        "Last Name from Observations" : None
    }
    return entry

In [None]:
def new_charge(**kwargs):
    '''
    Take information for new charge
    Return dictionary for charge
    '''
    charge = {
        "Charge Class" : kwargs["charge_class"],
        "Charge Description" : kwargs["charge_desc"],
        "Case Disposition" : kwargs["charge_disp"],
        "Statute" : kwargs["charge_statute"],
        "Booking ID" : kwargs["booking_id"],
        "Charge Code" : kwargs["charge_code"],
        "Charge Rank" : kwargs["rank"],        
    }
    if not charge["Charge Class"]:
        unclassed_charges.append((charge["Statute"], charge["Charge Description"]))

    return charge

In [None]:
def text_clean(text, keep_all = False, first_and_last = False, last_name = False):
    '''
    Strip end spaces, extra spaces between names and
    middle names and initials from first names depending on parameters
    
    Return edited name in all caps
    '''
    text = text.replace(".", "")
    text = text.replace(",", "")
    
    if last_name:
        allwords = text.split()
        return allwords[-1].upper()
    
    if first_and_last:
        allwords = text.split()
        if len(allwords) >= 2:
            final = " ".join([allwords[0], allwords[-1]])
        else:
            final = allwords[0]
        return final.upper()
    elif keep_all:
        allwords = text.split()
        final = " ".join(allwords)
        return final.upper()
    
    else:
        allwords = text.split()
        return allwords[0].upper()


In [None]:
def standardize_class(class_name):
    '''
    Look up class name in dictionary to standardize
    Return standard class name
    '''
    if class_name and pd.notnull(class_name):
        return standard_classes[class_name.upper()]
    else:
        return None

In [None]:
def class_lookup(statute, description):
    '''
    Look up charge based on statute and description
    '''
    if pd.notnull(statute) and pd.notnull(description):
        key = " ".join([description, statute])
        if key in class_dict:
            return class_dict[key]
    elif pd.notnull(statute):
        if statute.upper() in class_dict:
            return class_dict[statute.upper()]
    elif pd.notnull(description):
        if description.upper() in class_dict:
            return class_dict[description.upper()]
    else:
        return

In [None]:
def add_altnames(df, db, iid):
    '''
    Add alternative names from fuzzy and court records. Can take only up to 4 different name spellings
    '''
    for i, row in df.iterrows():
        if row.iloc[0] in db[iid]["All Spellings"]:
            db[iid]["All Spellings"].add(text_clean(row.iloc[0], keep_all = True))
            if len(df.columns) >= 2:
                if pd.notnull(row.iloc[1]):
                    db[iid]["All Spellings"].add(text_clean(row.iloc[1], keep_all = True))
            if len(df.columns) >= 3:
                if pd.notnull(row.iloc[2]):
                    db[iid]["All Spellings"].add(text_clean(row.iloc[2], keep_all = True))
            if len(df.columns) >= 4:
                if pd.notnull(row.iloc[3]):
                    db[iid]["All Spellings"].add(text_clean(row.iloc[3], keep_all = True))
            break

## Create Dictionary of Standardized Classes

In [None]:
class_dict = {}
for i, row in charge_classes.iterrows():
    if pd.notnull(row["Charge Description"]) and pd.notnull(row["Statute"]):
        key = " ".join([row["Charge Description"].upper(), row["Statute"].upper()])
        if key in class_dict:
            if class_rank[class_dict[key]] < class_rank[standardize_class(row["Class"])]:
                class_dict[key] = standardize_class(row["Class"])
        else:
            class_dict[key] = standardize_class(row["Class"])
    if pd.notnull(row["Charge Description"]):
        if row["Charge Description"].upper() in class_dict:
            if class_rank[class_dict[row["Charge Description"].upper()]] < class_rank[standardize_class(row["Class"])]:
                class_dict[row["Charge Description"].upper()] = standardize_class(row["Class"])
        else:
            class_dict[row["Charge Description"].upper()] = standardize_class(row["Class"])
            
    if pd.notnull(row["Statute"]):
        if row["Statute"].upper() in class_dict:
            if class_rank[class_dict[row["Statute"].upper()]] < class_rank[standardize_class(row["Class"])]:
                class_dict[row["Statute"].upper()] = standardize_class(row["Class"])
        else:
            class_dict[row["Statute"].upper()] = standardize_class(row["Class"])

## Create Dictionary of Individuals

In [None]:
day = dateparser.parse("7/5/2016")
day_1 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/6/2016")
day_2 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/7/2016")
day_3 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/8/2016")
day_4 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/11/2016")
day_5 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/12/2016")
day_6 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/13/2016")
day_7 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/14/2016")
day_8 = datetime.date(day.year, day.month, day.day)

day = dateparser.parse("7/15/2016")
day_9 = datetime.date(day.year, day.month, day.day)

jail_matches = set([])
arrest_matches = set([])
no_matches = set([])
current = None
iid = 0
db = {}

for i, court_row in bond_court.iterrows():
    if pd.notnull(court_row["Defendent First Name"]) and pd.notnull(court_row["Defendent Last Name"]):
        first = text_clean(court_row["Defendent First Name"])
        last = text_clean(court_row["Defendent Last Name"], last_name = True)
    else:
        continue
    
    this = " ".join([first, last])

    if current != this:
        
        current = this
        iid += 1
    
        day = dateparser.parse(court_row["Date"])
        bond_date = datetime.date(day.year, day.month, day.day)
        if bond_date == day_1:
            jail_data = jail_1
        elif bond_date == day_2:
            jail_data = jail_2
        elif bond_date == day_3:
            jail_data = jail_3
        elif bond_date == day_4:
            jail_data = jail_4
        elif bond_date == day_5:
            jail_data = jail_5
        elif bond_date == day_6:
            jail_data = jail_6
        elif bond_date == day_7:
            jail_data = jail_7
        elif bond_date == day_8:
            jail_data = jail_8
        elif bond_date == day_9:
            jail_data = jail_9
            
        db[iid] = new_entry(last, first, bond_date)
        db[iid]["First Name from Observations"] = first
        db[iid]["Last Name from Observations"] = last
        if pd.notnull(court_row["Judge"]):
            db[iid]["Judge"] = judges[text_clean(court_row["Judge"], keep_all = True)]
        if pd.notnull(court_row["Scale"]):
            db[iid]["Old Scale"] = court_row["Scale"]
        if pd.notnull(court_row["Risk Scale"]):
            db[iid]["Threat Risk Scale"] = court_row["Risk Scale"]
        if pd.notnull(court_row["No Show Risk"]):
            db[iid]["No Show Risk Scale"] = court_row["No Show Risk"]
        if pd.notnull(court_row["Courthouse"]):
            db[iid]["Courthouse"] = text_clean(court_row["Courthouse"], keep_all = True)
        if pd.notnull(court_row["Bond Type"]):
            db[iid]["Bond Type"] = court_row["Bond Type"].upper()
        if pd.notnull(court_row["Race"]):
            db[iid]["Race"] = race_dict[court_row["Race"]]
        if pd.notnull(court_row["Gender"]):
            db[iid]["Gender"] = text_clean(court_row["Gender"])
        if pd.notnull(court_row["Jail ID for Disamb"]):
            db[iid]["Jail ID"] = court_row["Jail ID for Disamb"]
        if pd.notnull(court_row["Total Bond"]):
            db[iid]["Total Bond"] = float(court_row["Total Bond"].strip("$").replace(",", "").replace("*", "").replace(" (?)", ""))

        '''
        The arrest DF is the result of looking up individuals by hand to find
        arrest records on the CPD website, and is used here only to add alternative
        spellings that were found.
        '''
        for k, row in arrest_lookup.iterrows():
            matchrowlast = row["Original Spelling: Defendent Last Name"]
            matchrowfirst = row["Original Spelling: Defendent First Name"]
            if pd.notnull(matchrowlast) and pd.notnull(matchrowfirst):
                matchrowlast = text_clean(str(matchrowlast), last_name = True)
                matchrowfirst = text_clean(str(matchrowfirst))
                if matchrowlast == last and matchrowfirst == first:
                    if pd.notnull(row["Arrest Last Name"]) and pd.notnull(row["Arrest First Name"]):
                        alt_last = text_clean(row["Arrest Last Name"], last_name = True)
                        alt_first = text_clean(row["Arrest First Name"])

                    else:
                        alt_last = None
                        alt_first = None
                    
                    if alt_last and alt_first:
                        db[iid]["Last Name"] = alt_last
                        db[iid]["First Name"] = alt_first
                        db[iid]["All Spellings"].add(" ".join([alt_first, alt_last]))
                    break

                else:
                    continue
            else:
                continue
                
        '''
        Add alternative names from fuzzy and court records. Can take only up to 4 different name spellings
        '''
        add_altnames(fuzzy_altnames, db, iid)
        add_altnames(courthouse_altnames, db, iid)
        
        '''
        Look for jail records 7 days after arrest
        '''
        jailtime = False
        arrestrecord = False
        no_match = True
        namestr = (first, last, alt_first, alt_last)

        for j, row in jail_data.iterrows():

            jail_last = text_clean(row["InmateLast"], last_name = True)
            jail_first = text_clean(row["InmateFirst"])
            
            jailname = " ".join([jail_first, jail_last])
            
            #add fuzzy name matching

            if jailname in db[iid]["All Spellings"]:
                if db[iid]["Jail ID"]:
                    if db[iid]["Jail ID"] != row["Inmateid"]:
                        continue
                
                #add check to make sure they have not been in jail the whole time
                
                jail_matches.add(namestr)

                no_match = False
                jail_time = True
                
                # var for section
                race = row["Race"]
                age = row["Age"]
                gender = row["Gender"]
                charge = row["charge_description"]
                statute = row["charge_statute"]
                cclass = row["charge_crimeType"]
                jailid = row["Inmateid"]
                bookid = row["Bookingid"]
                bookdate = row["Bookingdate"]
                dispo = row["casedisposition"]
                bond = row["TotalBond"]
                
                
                if pd.notnull(race):
                    db[iid]["Race"] = race_dict[race]
                if pd.notnull(age):
                    db[iid]["Age"] = age
                if pd.notnull(gender):
                    db[iid]["Gender"] = text_clean(gender)
                if pd.notnull(jailid):
                    db[iid]["Jail ID"] = jailid
                if pd.notnull(bookdate):
                    day = dateparser.parse(bookdate)
                    db[iid]["Booking Date"] = datetime.date(day.year, day.month, day.day)
                if pd.notnull(bond) and bond != 0:
                    db[iid]["Jail Bond"] = float(bond.strip("$").replace(",", ""))
                else:
                    db[iid]["No Bond"] = True
                db[iid]["Days in Jail"] = ((bond_date + datetime.timedelta(days=7)) - db[iid]["Booking Date"]).days
                db[iid]["Jail Record"] = True
                db[iid]["Still in Jail After a Week"] = True
                
                break

        '''
        Look for name in arrest and release data
        '''
        for l, row in cpd_arrest_data.iterrows():
            rel_first = text_clean(row["first_name"])
            rel_last = text_clean(row["last_name"], last_name = True)
            rel_name = " ".join([rel_first, rel_last])

            if rel_name in db[iid]["All Spellings"]:
                
                arrest_matches.add(namestr)
                arrestrecord = True
                no_match = False
                
                # var for section
                arrest_id = row["central_booking"]
                arrest_date = row["received_in_custody"]
                release_date = row["released_from_custody"]
                age = row["age"]
                statute = row["statute"]
                bond = row["amount"]
                
                if pd.notnull(arrest_id):
                    db[iid]["Arrest ID"] = arrest_id
                if pd.notnull(arrest_date):
                    day = dateparser.parse(arrest_date)
                    db[iid]["CPD Arrest Date"] = datetime.date(day.year, day.month, day.day)
                if pd.notnull(release_date):
                    dateparser.parse(release_date)
                    db[iid]["CPD Release Date"] = datetime.date(day.year, day.month, day.day)
                if pd.notnull(arrest_date) and pd.notnull(release_date):
                    db[iid]["Days in Police Custody"] = (db[iid]["CPD Release Date"] - db[iid]["CPD Arrest Date"]).days
                if pd.notnull(age):
                    db[iid]["Age"] = age
                if pd.notnull(bond) and not db[iid]["Total Bond"]:
                    db[iid]["Total Bond"] = float(bond)
                db[iid]["Arrest Record"] = True
                
                if not db[iid]["Jail Record"]:
                    db[iid]["First Name"] = rel_first
                    db[iid]["Last Name"] = rel_last
                
                break
                
        '''
        If neither arrest record nor jail record found, look in data obtained from looking up
        missing individuals at courthouse by hand.
        '''
        if no_match:
            for n, row in courthouse_lookup.iterrows():
                name_list = []
                if pd.notnull(row["Name 1"]):
                    name1 = text_clean(row["Name 1"], keep_all = True)
                    name1a = text_clean(row["Name 1"], first_and_last = True)
                    name_list.append(name1)
                    name_list.append(name1a)
                if pd.notnull(row["Name 2"]):
                    name2 = text_clean(row["Name 2"], keep_all = True)
                    name2a = text_clean(row["Name 2"], first_and_last = True)
                    name_list.append(name2)
                    name_list.append(name2a)
                if pd.notnull(row["Name 3"]):
                    name3 = text_clean(row["Name 3"], keep_all = True)
                    name3a = text_clean(row["Name 3"], first_and_last = True)
                    name_list.append(name3)
                    name_list.append(name3a)
                if pd.notnull(row["Name 4"]):
                    name4 = text_clean(row["Name 4"], keep_all = True)
                    name4a = text_clean(row["Name 4"], first_and_last = True)
                    name_list.append(name4)
                    name_list.append(name4a)
                    
                all_names = set(name_list)
                if bool(db[iid]["All Spellings"] & all_names):
                    no_match = False
                    
                    # var for section
                    arrest_id = row["CBN"]
                    if pd.notnull(row["DOB"]):
                        dobdate = dateparser.parse(row["DOB"])
                        DOB = datetime.date(dobdate.year, dobdate.month, dobdate.day)
                        age = (dateutil.relativedelta.relativedelta(bond_date, DOB)).years
                    else:
                        age = None

                    db[iid]["Age"] = age
                    db[iid]["Arrest Record"] = True
                    db[iid]["Arrest ID"] = arrest_id

                    break
                    
            
        if no_match:
            no_matches.add(namestr)
        if not db[iid]["Race"] and pd.notnull(court_row["Race"]):
            db[iid]["Race"] = race_dict[court_row["Race"]]
        if court_row["Bond Type"] == "NONE":
            db[iid]["No Bond"] = True
            
print(iid)

### Post-process: Look for individuals in other jail days

Dictionary: Which jail dataframes to check for each bond court date

In [None]:
release_dates = {
    datetime.date(2016, 7, 5) : {
        datetime.date(2016, 7, 5): pd.DataFrame([]),
        datetime.date(2016, 7, 6): pd.DataFrame([]),
        datetime.date(2016, 7, 7): pd.DataFrame([]),
        datetime.date(2016, 7, 8): pd.DataFrame([]),
        datetime.date(2016, 7, 9): pd.DataFrame([]),
        datetime.date(2016, 7, 10): pd.DataFrame([]),
        datetime.date(2016, 7, 11) : pd.DataFrame([])
    },
    datetime.date(2016, 7, 6) : {
        datetime.date(2016, 7, 6): pd.DataFrame([]),
        datetime.date(2016, 7, 7): pd.DataFrame([]), 
        datetime.date(2016, 7, 8): pd.DataFrame([]), 
        datetime.date(2016, 7, 9): pd.DataFrame([]), 
        datetime.date(2016, 7, 10): pd.DataFrame([]), 
        datetime.date(2016, 7, 11) : pd.DataFrame([]),
        datetime.date(2016, 7, 12): jail_1
    },
    datetime.date(2016, 7, 7) : {
        datetime.date(2016, 7, 7): pd.DataFrame([]),
        datetime.date(2016, 7, 8): pd.DataFrame([]), 
        datetime.date(2016, 7, 9): pd.DataFrame([]), 
        datetime.date(2016, 7, 10): pd.DataFrame([]), 
        datetime.date(2016, 7, 11): pd.DataFrame([]), 
        datetime.date(2016, 7, 12): jail_1,
        datetime.date(2016, 7, 13): jail_2
    },
    datetime.date(2016, 7, 8) : {
        datetime.date(2016, 7, 8): pd.DataFrame([]), 
        datetime.date(2016, 7, 9): pd.DataFrame([]), 
        datetime.date(2016, 7, 10): pd.DataFrame([]), 
        datetime.date(2016, 7, 11): pd.DataFrame([]), 
        datetime.date(2016, 7, 12): jail_1,
        datetime.date(2016, 7, 13): jail_2,
        datetime.date(2016, 7, 14): jail_3
    },
    datetime.date(2016, 7, 11) :{
        datetime.date(2016, 7, 11): pd.DataFrame([]), 
        datetime.date(2016, 7, 12): jail_1, 
        datetime.date(2016, 7, 13): jail_2, 
        datetime.date(2016, 7, 14): jail_3, 
        datetime.date(2016, 7, 15): jail_4, 
        datetime.date(2016, 7, 16): pd.DataFrame([]),
        datetime.date(2016, 7, 17): pd.DataFrame([])
    },
    datetime.date(2016, 7, 12) :{
        datetime.date(2016, 7, 12): jail_1,
        datetime.date(2016, 7, 13): jail_2, 
        datetime.date(2016, 7, 14): jail_3, 
        datetime.date(2016, 7, 15): jail_4, 
        datetime.date(2016, 7, 16): pd.DataFrame([]), 
        datetime.date(2016, 7, 17): pd.DataFrame([]),
        datetime.date(2016, 7, 18): jail_5

    },
    datetime.date(2016, 7, 13) :{
        datetime.date(2016, 7, 13): jail_2,
        datetime.date(2016, 7, 14): jail_3, 
        datetime.date(2016, 7, 15): jail_4, 
        datetime.date(2016, 7, 16): pd.DataFrame([]), 
        datetime.date(2016, 7, 17): pd.DataFrame([]), 
        datetime.date(2016, 7, 18): jail_5,
        datetime.date(2016, 7, 19): jail_6
    },
    datetime.date(2016, 7, 14) :{
        datetime.date(2016, 7, 14): jail_3, 
        datetime.date(2016, 7, 15): jail_4, 
        datetime.date(2016, 7, 16): pd.DataFrame([]), 
        datetime.date(2016, 7, 17): pd.DataFrame([]), 
        datetime.date(2016, 7, 18): jail_5,
        datetime.date(2016, 7, 19): jail_6,
        datetime.date(2016, 7, 20): jail_7
    },
    datetime.date(2016, 7, 15) :{
        datetime.date(2016, 7, 15): jail_4, 
        datetime.date(2016, 7, 16): pd.DataFrame([]), 
        datetime.date(2016, 7, 17): pd.DataFrame([]), 
        datetime.date(2016, 7, 18): jail_5,
        datetime.date(2016, 7, 19): jail_6,
        datetime.date(2016, 7, 20): jail_7,
        datetime.date(2016, 7, 21): jail_8
    }
}

have_jail_data = [datetime.date(2016, 7, 22), datetime.date(2016, 7, 12), datetime.date(2016, 7, 13), \
                             datetime.date(2016, 7, 14), datetime.date(2016, 7, 17), datetime.date(2016, 7, 18), \
                             datetime.date(2016, 7, 19), datetime.date(2016, 7, 20), datetime.date(2016, 7, 21)]

Check for names with no jail record 7 days after bond court appearance in prior days jail records.
Makes assumption that defendent wasn't released and brought in on new charges within 7 days from the bond hearing.
Sets release day to first day where name does not appear, problem for missing jail days. Will need to lookup and update these records individually.

In [None]:
for i, person in db.items():
    dates_in_jail = []
    if not person["Jail Record"]:
        for date, jail_df in release_dates[person["Bond Court Date"]].items():
            for j, row in jail_df.iterrows():
                jailname = row["InmateFirst"] + " " + row["InmateLast"]
                if jailname in person["All Spellings"]:
                    dates_in_jail.append(date)
                    person["All Spellings"].add(jailname)

                    # var for section
                    race = row["Race"]
                    age = row["Age"]
                    gender = row["Gender"]
                    charge = row["charge_description"]
                    statute = row["charge_statute"]
                    cclass = row["charge_crimeType"]
                    jailid = row["Inmateid"]
                    bookid = row["Bookingid"]
                    bookdate = row["Bookingdate"]
                    dispo = row["casedisposition"]
                    bond = row["TotalBond"]


                    if pd.notnull(race):
                        person["Race"] = race
                    if pd.notnull(age):
                        person["Age"] = age
                    if pd.notnull(gender):
                        person["Gender"] = gender
                    if pd.notnull(jailid):
                        person["Jail ID"] = jailid
                    if pd.notnull(bookdate):
                        day = dateparser.parse(bookdate)
                        person["Booking Date"] = datetime.date(day.year, day.month, day.day)
                    if pd.notnull(bond) and bond != 0:
                        person["Total Bond"] = float(bond.strip("$").replace(",", ""))
                    else:
                        db[iid]["No Bond"] = True
                        
                    break #remove if change to adding charges here
                        
        if len(dates_in_jail) > 0:
            print(dates_in_jail)
            person["Jail Record"] = True
            person["Jail Release Date"] = max(dates_in_jail) + datetime.timedelta(days=1)
            person["Days in Jail"] = (person["Jail Release Date"] - person["Booking Date"]).days
            if person["Jail Release Date"] not in have_jail_data:
                print(person["Jail ID"]) #The actual release dates for these people need to be confirmed

### Post-process: Add all charges to individual record

Jail data frames by date of snapshot

In [None]:
jail_dfs = {
    datetime.date(2016, 7, 12) : jail_1,
    datetime.date(2016, 7, 13) : jail_2,
    datetime.date(2016, 7, 14) : jail_3,
    datetime.date(2016, 7, 15) : jail_4,
    datetime.date(2016, 7, 18) : jail_5,
    datetime.date(2016, 7, 19) : jail_6,
    datetime.date(2016, 7, 20) : jail_7,
    datetime.date(2016, 7, 21) : jail_8,
    datetime.date(2016, 7, 22) : jail_9
}

Add all charges to individual record

In [None]:
for iid, person in db.items():
    if person["Jail Record"]:
        if person["Still in Jail After a Week"]:
            check_date = person["Bond Court Date"] + datetime.timedelta(days=7)
        elif person["Jail Release Date"]:
            check_date = person["Jail Release Date"] - datetime.timedelta(days=1)

        jail = jail_dfs[check_date]
        for i, row in jail.iterrows():
            if person["Jail ID"] == row["Inmateid"]:
                
                kwargs = {
                    "charge_desc" : None,
                    "charge_class" : None,
                    "charge_statute" : None,
                    "charge_disp" : None,
                    "booking_id" : None,
                    "charge_code" : None,
                    "rank" : None
                }

                if pd.notnull(row["charge_description"]):
                    kwargs["charge_desc"] = row["charge_description"].upper()
                if pd.notnull(row["charge_crimeType"]):
                    kwargs["charge_class"] = standardize_class(row["charge_crimeType"].upper())
                elif pd.notnull(row["charge_description"]) or pd.notnull(row["charge_statute"]):
                    kwargs["charge_class"] = standardize_class(class_lookup(row["charge_statute"], row["charge_description"]))
                if pd.notnull(row["charge_statute"]):
                    kwargs["charge_statute"] = row["charge_statute"].upper()
                if pd.notnull(row["casedisposition"]):
                    kwargs["charge_disp"] = row["casedisposition"].upper()
                if pd.notnull(row["Bookingid"]):
                    kwargs["booking_id"] = row["Bookingid"]
                if kwargs["charge_class"]:
                    kwargs["rank"] = class_rank[kwargs["charge_class"]]

                person["Charges"].append(new_charge(**kwargs))
            
    elif person["Arrest Record"]:
        for i, row in cpd_arrest_data.iterrows():
            if person["Arrest ID"] == row["central_booking"]:
                
                kwargs = {
                    "charge_desc" : None,
                    "charge_class" : None,
                    "charge_statute" : None,
                    "charge_disp" : None,
                    "booking_id" : None,
                    "charge_code" : None,
                    "rank" : None
                }

                if pd.notnull(row["description"]):
                    kwargs["charge_desc"] = row["description"].upper()
                    
                if pd.notnull(row["statute"]):
                    kwargs["charge_statute"] = row["statute"].upper()
                    
                if pd.notnull(row["statute"]) or pd.notnull(row["description"]):
                    kwargs["charge_class"] = standardize_class(class_lookup(row["statute"], row["description"]))
                if kwargs["charge_class"]:
                    kwargs["rank"] = class_rank[kwargs["charge_class"]]

                person["Charges"].append(new_charge(**kwargs))
    
    elif person["Court Record"]:
        for i, row in courthouse_lookup.iterrows():
            all_names = set([row["Name 1"], row["Name 2"], row["Name 3"], row["Name 4"]])
            if bool(person["All Spellings"] & all_names):
                kwargs = {
                            "charge_desc" : None,
                            "charge_class" : None,
                            "charge_statute" : None,
                            "charge_disp" : None,
                            "booking_id" : None,
                            "charge_code" : None,
                            "rank" : None
                        }
                if pd.notnull(row["Statute"]):
                    kwargs["charge_statute"] = row["Statute"]
                if pd.notnull(row["Charge Description"]):
                    kwargs["charge_desc"] = row["Charge Description"]
                if pd.notnull(row["Class"]):
                    kwargs["charge_class"] = standardize_class(row["Class"])
                elif pd.notnull(row["Statute"]) or pd.notnull(row["Charge Description"]):
                    kwargs["charge_class"] = standardize_class(class_lookup(row["Statute"], row["Charge Description"]))
                if kwargs["charge_class"]:
                    kwargs["rank"] = class_rank[kwargs["charge_class"]]

### Post-Process: Get Highest Charge, Charge Count

Go through all charges for person to find highest charge

In [None]:
for iid, person in db.items():
    current_charge = None
    current_class = None
    for charge in person["Charges"]:
        if class_rank[charge["Charge Class"]] >= class_rank[current_class]:
            current_charge = charge["Charge Description"]
            current_class = charge["Charge Class"]
    person["Highest Charge Description"] = current_charge
    person["Highest Charge Class"] = current_class
    person["Highest Charge Rank"] = class_rank[current_class]
    person["Number of Charges"] = len(person["Charges"])

### Write to DataFrame and CSV

In [None]:
grid = []

for iid, info in db.items():
    for charge in info["Charges"]:
        row = []
        row.extend([iid, \
                    info["Last Name"], \
                    info["First Name"], \
                    list(info["All Spellings"]), \
                    info["Bond Court Date"], \
                    info["Days in Police Custody"], \
                    info["CPD Arrest Date"], \
                    info["CPD Release Date"], \
                    info["Booking Date"], \
                    info["Total Bond"], \
                    info["Highest Charge Rank"], \
                    info["Highest Charge Class"], \
                    info["Highest Charge Description"], \
                    info["Race"], \
                    info["Age"], \
                    info["Gender"], \
                    info["Jail ID"], \
                    info["Arrest ID"], \
                    info["Jail Record"], \
                    info["Arrest Record"], \
                    info["Court Record"], \
                    info["Unaccounted Custody"], \
                    info["Number of Charges"], \
                    info["No Bond"], \
                    info["Bond Type"], \
                    info["Days in Jail"], \
                    info["Judge"], \
                    info["Old Scale"], \
                    info["Threat Risk Scale"], \
                    info["No Show Risk Scale"], \
                    info["Courthouse"], \
                    info["Jail Release Date"], \
                    info["Still in Jail After a Week"], \
                    info["Courthouse Record"], \
                    info["First Name from Observations"], \
                    info["Last Name from Observations"], \
                    charge["Charge Class"], \
                    charge["Charge Description"], \
                    charge["Case Disposition"], \
                    charge["Statute"], \
                    charge["Booking ID"], \
                    charge["Charge Code"]])
        if not charge["Charge Class"]:
            unclassed_charges.append((charge["Statute"], charge["Charge Description"]))
        
        grid.append(row)
                    

In [None]:
headings = [
    "DB ID", \
    "Last Name", \
    "First Name", \
    "All Spellings", \
    "Bond Court Date", \
    "Days in Police Custody", \
    "CPD Arrest Date", \
    "CPD Release Date", \
    "Booking Date", \
    "Total Bond", \
    "Highest Charge Rank", \
    "Highest Charge Class", \
    "Highest Charge Description", \
    "Race", \
    "Age", \
    "Gender", \
    "Jail ID", \
    "Arrest ID", \
    "Jail Record", \
    "Arrest Record", \
    "Court Record", \
    "Unaccounted Custody", \
    "Number of Charges", \
    "No Bond", \
    "Bond Type", \
    "Days in Jail", \
    "Judge", \
    "Old Scale", \
    "Threat Risk Scale", \
    "No Show Risk Scale", \
    "Courthouse", \
    "Jail Release Date", \
    "Still in Jail After a Week", \
    "Courthouse Record", \
    "First Name from Observations", \
    "Last Name from Observations", \
    "Charge Class", \
    "Charge Description", \
    "Case Disposition", \
    "Statute", \
    "Booking ID", \
    "Charge Code"
]

In [None]:
all_data = pd.DataFrame(grid, columns = headings)

In [None]:
all_data.to_csv("../Output/Representation of Full Database.csv", index=False)

In [None]:
missing_charges = pd.DataFrame(unclassed_charges, columns = ["Statute", "Description"])

In [None]:
missing_charges.to_csv("../Output/3rd Round Unmatched Charges.csv", index=False)

### Send to Mongo Database

first run "sudo mongod" in terminal to start mongo instance

In [None]:
from pymongo import MongoClient

In [None]:
client = MongoClient()

In [None]:
client.drop_database("BailBondDB")

In [None]:
bbdb = client["BailBondDB"]
coll = bbdb["bond_court_individuals"]

Change datetime.date to datetime.datetime for database. Hours and minutes set to 0.

In [None]:
for iid, person in db.items():
    if person['CPD Arrest Date']:
        d = person['CPD Arrest Date']
        person['CPD Arrest Date'] = datetime.datetime(d.year, d.month, d.day, 0, 0)
    if ['Bond Court Date']:
        d = person['Bond Court Date']
        person['Bond Court Date'] = datetime.datetime(d.year, d.month, d.day, 0, 0)
    if person['CPD Release Date']:
        d = person['CPD Release Date']
        person['CPD Release Date'] = datetime.datetime(d.year, d.month, d.day, 0, 0)
    if person['Booking Date']:
        d = person['Booking Date']
        person['Booking Date'] = datetime.datetime(d.year, d.month, d.day, 0, 0)
    if person["Jail Release Date"]:
        d = person["Jail Release Date"]
        person["Jail Release Date"] = datetime.datetime(d.year, d.month, d.day, 0, 0)
    if type(person["Days in Jail"]) == datetime.timedelta:
        person["Days in Jail"] = person["Days in Jail"].days

In [None]:
for iid, person in db.items():
    list_trans = person["All Spellings"]
    person["All Spellings"] = list(list_trans)
    coll.insert_one(person)

In [None]:
client.close()