In [1]:
from pandas import read_stata
import pandas as pd
from lcsscaseapi.client import LCSSClient
from lcsscaseapi.types import USCircuitCaseMeta, USJudge, JudgeRuling
import datetime
import os
from dotenv import load_dotenv

In [4]:
df = read_stata("BloombergCASELEVEL_Touse.dta")

print(len(df["caseid"]))

387898


In [48]:
df = df.head(n=1000) # sub sample to play with

In [49]:
def create_case_meta(row):
    case_id = row['caseid']
    circuit_num = row['Circuit']
    circuit_name = USCircuitCaseMeta.CIRCUITS[circuit_num]
    self_cite = row['citation']
    docket_number = row['docketnumber']
    if not pd.isnull(row['date']):
        date = datetime.date(int(row['year']), int(row['month']), int(row['day']))
    else:
        date = None
    tags = construct_tags(row)
    outcome = construct_outcome(row)

    return USCircuitCaseMeta(case_id=case_id, circuit_name=circuit_name, self_cite=self_cite, 
                                docket_number=docket_number, date=date, tags=tags, outcome = outcome)

def construct_tags(row):
    tags = []
    if row['Criminal'] == 1:
        tags.append('CRIMINAL')
    
    if row['Civil_Rights'] == 1:
        tags.append('CIVIL RIGHTS')

    if row['First_Amendment'] == 1:
        tags.append('FIRST AMENDMENT')
    
    if row['Due_Process'] == 1:
        tags.append('DUE PROCESS')
    
    if row['Privacy'] == 1:
        tags.append('PRIVACY')
    
    if row['Labor_Relations'] == 1:
        tags.append('LABOR RELATIONS')
    
    if row['Econ_Activity'] == 1:
        tags.append('ECONOMIC ACTIVITY')

    if row['Miscellanous'] == 1:
        tags.append('MISCELLANEOUS')

    return tags

def construct_outcome(row):
    outcomes = []

    if row["Affirmed"] == 1:
        outcomes.append("AFFIRMED")
    
    if row["AffirmedInPart"] == 1:
        outcomes.append("AFFIRMED (IN PART)")
    
    if row["Reversed"] == 1:
        outcomes.append("REVERSED")

    if row["ReversedInPart"] == 1:
        outcomes.append("REVERSED (IN PART)")
    
    if row["Vacated"] == 1:
        outcomes.append("VACATED")

    if row["VacatedInPart"] == 1:
        outcomes.append("VACATED (IN PART)")

    if row["Remanded"] == 1:
        outcomes.append("REMANDED")

    if len(outcomes) == 0:
        return None
    else:
        return ",".join(outcomes)



    

In [50]:
# takes 39.5s to run
cases = df.apply(create_case_meta, axis=1)

print(len(cases))

1000


In [5]:
load_dotenv()

USERNAME = os.getenv('ACCOUNT')
PWD = os.getenv('PASSWORD')

client = LCSSClient(username=USERNAME, password=PWD)

In [78]:
#returned_cases = client.upload_us_cases(cases)

In [6]:
# Now, time to create all the USJudges and JudgeRulings

# The idea will be to create judges from every single case
# Then later collapse this into just the unique judges (which match on every field, since its unclear if orig_name's are unique)

# Given a row in the stata file, creates a tuple of USJudges
def create_judge_tuple(row):
    return (create_judge(row, 1), create_judge(row, 2), create_judge(row, 3))

# For a given row and judge number, returns that judge's details as a USJudge object
# judgenum = 1, 2 or 3 for judges labelled j1, j2 or j3
def create_judge(row, judgenum):
    name = judge_property(row, judgenum, "name")  # if empty string, return None is what this does
    name = None if name == "" else name
    orig_name = judge_property(row, judgenum, "Origname")
    gender_num = judge_property(row, judgenum, "gender")
    gender = None if pd.isnull(gender_num) else USJudge.GENDERS[int(gender_num)-1] # 1 is converted to MALE, 2 is converted to FEMALE
    party_num = judge_property(row, judgenum, "party")
    party_num = party_num_cleaning(party_num=party_num, name=name)
    party = None if pd.isnull(party_num) else USJudge.PARTIES[1-int(party_num)] # 1 is converted to Democrat, 0 is converted to Republican
    senior_num = judge_property(row, judgenum, "Senior")
    senior = None if pd.isnull(senior_num) else bool(senior_num)

    assert name != ""
    assert orig_name != ""
    assert gender == USJudge.MALE or gender_num != 1
    assert party == USJudge.DEMOCRAT or party_num != 1
    assert senior != False or senior_num == 0

    return USJudge(name=name, orig_name=orig_name, gender=gender, senior=senior, party=party)

# For a given judge and property, returns the property
# For example, calling judge_property(row, "j1", "name") will fetch j1name from the row
def judge_property(row, judgenum, judgeprop):
    return row["j" + str(judgenum) + judgeprop]

# Convert unclean party numbers to 1 for democrat, 0 for republican
def party_num_cleaning(party_num, name):
    party_num = 0 if name == "BOND, HUGH LENNOX" else party_num # BOND Appointed by Ulysses S Grant (R)
    party_num = 1 if name == "HAYS, PAUL" else party_num # HAYS Appointed by John F Kennedy (D)
    party_num = 0 if name == "MAHONEY, J. DANIEL" else party_num # MAHONEY Appointed by Ronald Reagan (R)
    party_num = 0 if name == "BURNS, LOUIS HENRY" else party_num # BURNS Appointed by Calvin Coolidge (R)
    party_num = 1 if name == "BAER, HAROLD, JR." else party_num # many Orignames Appointed by Bill Clinton (D) - also a district court judge

    party_num = None if party_num == 3 else party_num # unclear what party number of 3 really means, leave it blank for now

    return party_num


In [7]:
judges_per_case = df.apply(create_judge_tuple, axis=1)

print(len(judges_per_case))

387898


In [8]:
# Extract unique judges
from lcsscaseapi.types import Judge
all_judges = set()
for judges in judges_per_case:
    (j1, j2, j3) = judges
    assert j1.judge_orig_name != None
    assert j2.judge_orig_name != None
    assert j3.judge_orig_name != None

    all_judges.add(j1)
    all_judges.add(j2)
    all_judges.add(j3)

print(len(all_judges))

<bound method Judge.__hash__ of Judge Object: {"id": 2, "judge_gender": null, "judge_name": null, "judge_orig_name": null}>
5926


In [15]:
judge_list = list(all_judges)

#uploaded_judges = client.upload_us_judges(judge_list)

In [16]:
print(len(uploaded_judges))


5926


In [25]:
from copy import deepcopy
# Create dictionary mapping id-less judge to ID
judge_id_dict = dict()
for judge in uploaded_judges:
    idless_judge = deepcopy(judge)
    idless_judge.id = None
    assert judge.id != None
    judge_id_dict[idless_judge] = judge.id

In [29]:


for i in range(10): #range(len(judges_per_case)):
    caseid = df.iloc[i]['caseid']
    (j1, j2, j3) = judges_per_case[i]

    # fetch IDs of every uploaded judge
    j1.id = judge_id_dict[j1]
    j2.id = judge_id_dict[j2]
    j3.id = judge_id_dict[j3]

5946
