In [78]:
import os
import lexmachina
from collections import defaultdict
# pip install openpyxl
import openpyxl
import statistics

In [2]:
configuration = lexmachina.Configuration(
    host="https://api.lexmachina.com",
    access_token=os.environ["BEARER_TOKEN"]
)

In [6]:
query = {
        "caseTypes": {
            "include": [
            "Antitrust"
            ]
        },
        "dates": {
            "terminated": {
            "onOrAfter": "2024-01-01",
            "onOrBefore": "2024-12-31"
            }
        },
        "page": 1,
        "pageSize": 100
    }

In [7]:
with lexmachina.ApiClient(configuration) as api_client:
    fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)

    case_ids = []
    done_paging = False

    while not done_paging:
        query_response = fed_dist_case_api_instance.query_district_cases(query)
    
        if query_response.cases:
            current_page = query['page']
            print(f'{current_page=}')
            result_case_ids = [caseref.district_case_id for caseref in query_response.cases]
            case_ids += result_case_ids
            query['page'] = current_page + 1
    
        else:
            print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}')
            done_paging=True

    case_data = []

    for case_id in case_ids:
        case_data.append(fed_dist_case_api_instance.get_district_case(case_id))
        if len(case_data) % 50 == 0:
             print(f'{len(case_data)} out of {len(case_ids)} processed')

    

current_page=1
current_page=2
current_page=3
current_page=4
current_page=5
current_page=6
current_page=7
Antitrust cases terminated in 2024 has length 671
50 out of 671 processed
100 out of 671 processed
150 out of 671 processed
200 out of 671 processed
250 out of 671 processed
300 out of 671 processed
350 out of 671 processed
400 out of 671 processed
450 out of 671 processed
500 out of 671 processed
550 out of 671 processed
600 out of 671 processed
650 out of 671 processed


In [10]:
case_data[0].judges

[FederalJudge(name='Mitchell S. Goldberg', federal_judge_id=3193)]

In [62]:
cases_by_judge = defaultdict(list)

In [63]:
for c in case_data:
    for j in c.judges:
        cases_by_judge[(j.name, j.federal_judge_id)].append(dict(case_id=c.district_case_id, duration=c.dates.terminated - c.dates.filed))
        

In [64]:
len(cases_by_judge)

378

In [65]:
list(cases_by_judge)[:5]

[('Mitchell S. Goldberg', 3193),
 ('Edmond E-Min Chang', 3342),
 ('Miriam Goldman Cedarbaum', 406),
 ('Lorna Gail Schofield', 3451),
 ('Joel A. Pisano', 2851)]

In [66]:
cases_by_judge[('Lorna Gail Schofield', 3451)]

[{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}]

In [67]:
case_count_by_judges = [(judge_info, len(cases_by_judge[judge_info])) for judge_info in cases_by_judge]

In [68]:
sorted_case_counts_by_judges = sorted(case_count_by_judges, key=lambda x: x[-1], reverse=True)

In [69]:
sorted_case_counts_by_judges[:5]

[(('Edgardo Ramos', 3405), 37),
 (('Waverly David Crenshaw Jr.', 3603), 34),
 (('Sarah Elizabeth Pitlyk', 7465646), 30),
 (('P. Kevin Castel', 3029), 23),
 (('Sara Elizabeth Lioi', 3140), 23)]

In [70]:
sorted_case_counts_by_judges[-5:]

[(('Steven Douglas Merryday', 1627), 1),
 (('Mary Stenson Scriven', 3189), 1),
 (('John George Koeltl', 1305), 1),
 (('John Charles Hinderaker', 8938396), 1),
 (('Yvonne Gonzalez Rogers', 3404), 1)]

In [71]:
list( cases_by_judge.values())[:5]

[[{'case_id': 97091, 'duration': datetime.timedelta(days=6752)},
  {'case_id': 2000045171, 'duration': datetime.timedelta(days=4015)},
  {'case_id': 2000045229, 'duration': datetime.timedelta(days=4011)},
  {'case_id': 2000046633, 'duration': datetime.timedelta(days=3980)},
  {'case_id': 2000047864, 'duration': datetime.timedelta(days=3952)},
  {'case_id': 2000049181, 'duration': datetime.timedelta(days=3918)},
  {'case_id': 2000049340, 'duration': datetime.timedelta(days=3912)},
  {'case_id': 2000049341, 'duration': datetime.timedelta(days=3912)},
  {'case_id': 2000049655, 'duration': datetime.timedelta(days=3905)},
  {'case_id': 2000049658, 'duration': datetime.timedelta(days=3905)},
  {'case_id': 2000049746, 'duration': datetime.timedelta(days=3903)},
  {'case_id': 2000051026, 'duration': datetime.timedelta(days=3863)}],
 [{'case_id': 48907, 'duration': datetime.timedelta(days=6117)},
  {'case_id': 2007889413, 'duration': datetime.timedelta(days=935)},
  {'case_id': 2034461979, 'dur

In [84]:
all_durations = []

In [85]:
for case_group in cases_by_judge.values():
    all_durations += [c['duration'].days for c in case_group]

In [86]:
all_durations[:5]

[6752, 4015, 4011, 3980, 3952]

In [87]:
sorted_all_durations = sorted(all_durations)

In [88]:
sorted_all_durations[:3]

[0, 1, 1]

In [89]:
sorted_all_durations[-3:]

[4981, 6117, 6752]

In [97]:
round(statistics.mean(sorted_all_durations))

1084

In [105]:
statistics.median(sorted_all_durations)

451

In [99]:
sorted_case_counts_by_judges[:5]

[(('Edgardo Ramos', 3405), 37),
 (('Waverly David Crenshaw Jr.', 3603), 34),
 (('Sarah Elizabeth Pitlyk', 7465646), 30),
 (('P. Kevin Castel', 3029), 23),
 (('Sara Elizabeth Lioi', 3140), 23)]

In [104]:
for j in sorted_case_counts_by_judges[:5]:
    judge_cases = cases_by_judge[j[0]]
    judge_durations = [c['duration'].days for c in judge_cases]
    print('--------------------')
    print(f'judge name: {j[0][0]}')
    print(f'total num cases: {j[1]}')
    print(f'average duration: {round(statistics.mean(judge_durations))}')
    print(f'median duration: {statistics.median(judge_durations)}')

--------------------
judge name: Edgardo Ramos
total num cases: 37
average duration: 133
median duration: 95
--------------------
judge name: Waverly David Crenshaw Jr.
total num cases: 34
average duration: 280
median duration: 307.0
--------------------
judge name: Sarah Elizabeth Pitlyk
total num cases: 30
average duration: 1146
median duration: 1178.0
--------------------
judge name: P. Kevin Castel
total num cases: 23
average duration: 823
median duration: 912
--------------------
judge name: Sara Elizabeth Lioi
total num cases: 23
average duration: 65
median duration: 71
