In [2]:
from pandas import read_stata
import pandas as pd
from lcsscaseapi.client import LCSSClient
from lcsscaseapi.types import USCircuitCaseMeta, USJudge, JudgeRuling
import datetime
import os
from dotenv import load_dotenv

In [3]:
df = read_stata("BloombergCASELEVEL_Touse.dta")

print(len(df["caseid"]))

387898


In [48]:
df = df.head(n=1000) # sub sample to play with

In [49]:
def create_case_meta(row):
    case_id = row['caseid']
    circuit_num = row['Circuit']
    circuit_name = USCircuitCaseMeta.CIRCUITS[circuit_num]
    self_cite = row['citation']
    docket_number = row['docketnumber']
    if not pd.isnull(row['date']):
        date = datetime.date(int(row['year']), int(row['month']), int(row['day']))
    else:
        date = None
    tags = construct_tags(row)
    outcome = construct_outcome(row)

    return USCircuitCaseMeta(case_id=case_id, circuit_name=circuit_name, self_cite=self_cite, 
                                docket_number=docket_number, date=date, tags=tags, outcome = outcome)

def construct_tags(row):
    tags = []
    if row['Criminal'] == 1:
        tags.append('CRIMINAL')
    
    if row['Civil_Rights'] == 1:
        tags.append('CIVIL RIGHTS')

    if row['First_Amendment'] == 1:
        tags.append('FIRST AMENDMENT')
    
    if row['Due_Process'] == 1:
        tags.append('DUE PROCESS')
    
    if row['Privacy'] == 1:
        tags.append('PRIVACY')
    
    if row['Labor_Relations'] == 1:
        tags.append('LABOR RELATIONS')
    
    if row['Econ_Activity'] == 1:
        tags.append('ECONOMIC ACTIVITY')

    if row['Miscellanous'] == 1:
        tags.append('MISCELLANEOUS')

    return tags

def construct_outcome(row):
    outcomes = []

    if row["Affirmed"] == 1:
        outcomes.append("AFFIRMED")
    
    if row["AffirmedInPart"] == 1:
        outcomes.append("AFFIRMED (IN PART)")
    
    if row["Reversed"] == 1:
        outcomes.append("REVERSED")

    if row["ReversedInPart"] == 1:
        outcomes.append("REVERSED (IN PART)")
    
    if row["Vacated"] == 1:
        outcomes.append("VACATED")

    if row["VacatedInPart"] == 1:
        outcomes.append("VACATED (IN PART)")

    if row["Remanded"] == 1:
        outcomes.append("REMANDED")

    if len(outcomes) == 0:
        return None
    else:
        return ",".join(outcomes)



    

In [50]:
# takes 39.5s to run
cases = df.apply(create_case_meta, axis=1)

print(len(cases))

1000


In [4]:
load_dotenv()

USERNAME = os.getenv('ACCOUNT')
PWD = os.getenv('PASSWORD')

client = LCSSClient(username=USERNAME, password=PWD)

In [78]:
#returned_cases = client.upload_us_cases(cases)

In [64]:
# Now, time to create all the USJudges and JudgeRulings

# The idea will be to create judges from every single case
# Then later collapse this into just the unique judges (which match on every field, since its unclear if orig_name's are unique)

# Given a row in the stata file, creates a tuple of USJudges
def create_judge_tuple(row):
    return (create_judge(row, 1), create_judge(row, 2), create_judge(row, 3))

# For a given row and judge number, returns that judge's details as a USJudge object
# judgenum = 1, 2 or 3 for judges labelled j1, j2 or j3
def create_judge(row, judgenum):
    name = judge_property(row, judgenum, "name")  # if empty string, return None is what this does
    name = None if name == "" else name
    orig_name = judge_property(row, judgenum, "Origname")
    name = orig_name if name == None else name # if name is missing replace with the Origname
    orig_name = None # otherwise, remove the orig_name, it no longer bears any info and the same person can have two orig names
    gender_num = judge_property(row, judgenum, "gender")
    gender = None if pd.isnull(gender_num) else USJudge.GENDERS[int(gender_num)-1] # 1 is converted to MALE, 2 is converted to FEMALE
    party_num = judge_property(row, judgenum, "party")
    party_num = party_num_cleaning(party_num=party_num, name=name)
    party = None if pd.isnull(party_num) else USJudge.PARTIES[1-int(party_num)] # 1 is converted to Democrat, 0 is converted to Republican
    senior_num = judge_property(row, judgenum, "Senior")
    senior = None if pd.isnull(senior_num) else bool(senior_num)

    assert name != ""
    assert orig_name != ""
    assert gender == USJudge.MALE or gender_num != 1
    assert party == USJudge.DEMOCRAT or party_num != 1
    assert senior != False or senior_num == 0

    return USJudge(name=name, orig_name=orig_name, gender=gender, senior=senior, party=party)

# For a given judge and property, returns the property
# For example, calling judge_property(row, "j1", "name") will fetch j1name from the row
def judge_property(row, judgenum, judgeprop):
    return row["j" + str(judgenum) + judgeprop]

# Convert unclean party numbers to 1 for democrat, 0 for republican
def party_num_cleaning(party_num, name):
    party_num = 0 if name == "BOND, HUGH LENNOX" else party_num # BOND Appointed by Ulysses S Grant (R)
    party_num = 1 if name == "HAYS, PAUL" else party_num # HAYS Appointed by John F Kennedy (D)
    party_num = 0 if name == "MAHONEY, J. DANIEL" else party_num # MAHONEY Appointed by Ronald Reagan (R)
    party_num = 0 if name == "BURNS, LOUIS HENRY" else party_num # BURNS Appointed by Calvin Coolidge (R)
    party_num = 1 if name == "BAER, HAROLD, JR." else party_num # many Orignames Appointed by Bill Clinton (D) - also a district court judge

    party_num = None if party_num == 3 else party_num # unclear what party number of 3 really means, leave it blank for now

    return party_num


In [65]:
judges_per_case = df.apply(create_judge_tuple, axis=1)

print(len(judges_per_case))

387898


In [66]:
# Extract unique judges
all_judges = set()
for judges in judges_per_case:
    (j1, j2, j3) = judges
    #assert j1.judge_orig_name != None
    #assert j2.judge_orig_name != None
    #assert j3.judge_orig_name != None

    all_judges.add(j1)
    all_judges.add(j2)
    all_judges.add(j3)

print(len(all_judges))

4502


In [68]:
judge_list = list(all_judges)

#uploaded_judges = client.upload_us_judges(judge_list)

#uploaded_judges = client.get_us_judges() # if running the remaining code after the upload, fetch the uploaded judges again - need their IDs

In [69]:
print(len(uploaded_judges))


4502


In [70]:
from copy import deepcopy
# Create dictionary mapping id-less judge to ID
judge_id_dict = dict()
for judge in uploaded_judges:
    idless_judge = deepcopy(judge)
    idless_judge.id = None
    assert judge.id != None
    judge_id_dict[idless_judge] = judge.id

In [71]:
# create a version of judges_per_case but each judge now has the ID
def assign_id(judge):
    return_judge = deepcopy(judge)
    return_judge.id = judge_id_dict[judge]
    return return_judge
    
judges_per_case_with_id =  [(assign_id(j1), assign_id(j2), assign_id(j3)) for j1,j2,j3 in judges_per_case]

In [73]:
df['judges_per_case'] = judges_per_case_with_id # add a column for the tuple of judge objects

In [103]:
def repopulate_orig_name(row):
    # orig names are used in matching judges to whether they were dissenters/concurrers/authors on a case
    (j1, j2, j3) = row['judges_per_case']
    j1 = deepcopy(j1)
    j2 = deepcopy(j2)
    j3 = deepcopy(j3)

    j1.judge_orig_name = judge_property(row, 1, "Origname")
    j2.judge_orig_name = judge_property(row, 2, "Origname")
    j3.judge_orig_name = judge_property(row, 3, "Origname")

    return (j1, j2, j3)

judges_per_case_complete = df.apply(repopulate_orig_name, axis=1)

In [110]:
print(len(judges_per_case_complete))
df['judges_per_case'] = judges_per_case_complete

387898


In [119]:
# THIS ENTIRE SECTION IS EXPLORATORY
# To see how to handle problems of unclean names
# not actually used to execute any action needed to upload judge rulings

dissenter_collision = 0
dissenter_not_found = 0
concurrer_collision = 0
concurrer_not_found = 0
author_collision = 0
author_not_found = 0

author_collision_list = []
def check_judge_matches(row):
    dissenters = row['JudgeDissentingTouse'].split(sep = "|")
    dissenters = [dissenter.strip().upper()  for dissenter in dissenters if dissenter.strip() != ""]
    concurrers = row['JudgeconcurringTouse'].split(sep = "|")
    concurrers = [concurrer.strip().upper() for concurrer in concurrers if concurrer.strip() != ""]
    author = row['Author']

    (j1, j2, j3) = row['judges_per_case']
    for dissenter in dissenters:
        res = matches_one_judge(j1, j2, j3, dissenter)
        if res != True:
            if res=="COLLISION":
                global dissenter_collision
                dissenter_collision = dissenter_collision + 1
            elif res=="NOT FOUND":
                global dissenter_not_found
                dissenter_not_found = dissenter_not_found + 1
    for concurrer in concurrers:
        res = matches_one_judge(j1, j2, j3, concurrer)
        if res != True:
            if res=="COLLISION":
                global concurrer_collision
                concurrer_collision = concurrer_collision + 1
            elif res=="NOT FOUND":
                global concurrer_not_found
                concurrer_not_found = concurrer_not_found + 1
    if not pd.isnull(author) and author.strip() != "" and author != "PER CURIAM":
        res = matches_one_judge(j1, j2, j3, author)
        if res != True:
            if res=="COLLISION":
                global author_collision
                author_collision = author_collision + 1
                author_collision_list.append((row['caseid'], author, j1, j2, j3))
            elif res=="NOT FOUND":
                global author_not_found
                author_not_found = author_not_found + 1

def matches_one_judge(j1, j2, j3, dissenter):
    # this function is to check that any named dissenter, concurrer or author
    # corresponds to exactly one judge
    # IE isn't typo'ed and corresponds to no judge
    # isn't confusingly possibly two different judges

    # Tries to match by name and by orig name
    res = name_matches_one(j1, j2, j3, dissenter)
    if res == True:
        # if matched, carry one
        return True
    else:
        res_orig = orig_name_matches_one(j1, j2, j3, dissenter)
        if res_orig == True:
            return True # can be matched by orig_name at least
        else:
            if res_orig == "COLLISION":
                res_exact = orig_name_matches_one_exactly(j1, j2, j3, dissenter)
                if res_exact == True:
                    return True
    return res
        
def name_matches_one(j1, j2, j3, dissenter):
    # this function is to check that any named dissenter, concurrer or author
    # corresponds to exactly one judge
    # IE isn't typo'ed and corresponds to no judge
    # isn't confusingly possibly two different judges
    if dissenter in j1.judge_name or dissenter in j2.judge_name or dissenter in j3.judge_name:
        # check they don't appear in two different judges names
        if not (dissenter in j1.judge_name and dissenter in j2.judge_name) \
            and not (dissenter in j2.judge_name and dissenter in j3.judge_name) \
                and not (dissenter in j1.judge_name and dissenter in j3.judge_name):
                return True
        else:
            return "COLLISION"
    else:
        return "NOT FOUND"
        

def orig_name_matches_one(j1, j2, j3, dissenter):
    # Same as above, but using orig_names
    if dissenter in j1.judge_orig_name or dissenter in j2.judge_orig_name or dissenter in j3.judge_orig_name:
        # check they don't appear in two different judges names
        if not (dissenter in j1.judge_orig_name and dissenter in j2.judge_orig_name) \
            and not (dissenter in j2.judge_orig_name and dissenter in j3.judge_orig_name) \
                and not (dissenter in j1.judge_orig_name and dissenter in j3.judge_orig_name):
                return True
        else:
            return "COLLISION"
    else:
        return "NOT FOUND"

def orig_name_matches_one_exactly(j1, j2, j3, dissenter):
    # Same as above, but using exact equality (to settle conflicts)
    if dissenter == j1.judge_orig_name or dissenter == j2.judge_orig_name or dissenter == j3.judge_orig_name:
        # check they don't appear in two different judges names
        if not (dissenter == j1.judge_orig_name and dissenter == j2.judge_orig_name) \
            and not (dissenter == j2.judge_orig_name and dissenter == j3.judge_orig_name) \
                and not (dissenter == j1.judge_orig_name and dissenter == j3.judge_orig_name):
                return True
        else:
            return "COLLISION"
    else:
        return "NOT FOUND"
#def create_judge_ruling(judge, caseid, dissenters, concurrers, author):
    #for 

df.apply(check_judge_matches, axis = 1)
print("Dissenter collison", dissenter_collision)
print("Dissenter not found", dissenter_not_found)
print("Concurrer collison", concurrer_collision)
print("Concurrer not found", concurrer_not_found)
print("Author collison", author_collision)
print("Author not found", author_not_found)

Dissenter collison 3
Dissenter not found 2451
Concurrer collison 2
Concurrer not found 1507
Author collison 47
Author not found 11485


In [127]:
problematic_author_names = set()
problematic_caseids = list()
for author_coll in author_collision_list:
    (caseid, name, _, _, _) = author_coll
    problematic_author_names.add(name)
    problematic_caseids.append(caseid)

print(len(problematic_author_names))
print(problematic_author_names)
#print(author_collision_list)
(df[df.caseid.isin(problematic_caseids)])[["caseid", "j1name", "j2name", "j3name", "Author"]]

12
{'THOMPSON', 'GIBSON', 'NELSON', 'WOOD', 'MICHAEL', 'WILLIAMS', 'NEWMAN', 'PHILLIPS', 'ARNOLD', 'ANDERSON', 'CARNES', 'GINSBURG'}


KeyError: ('caseid', 'j1name', 'j2name', 'j3name', 'Author')