In [1]:
import os
from collections import defaultdict
import statistics

import lexmachina
import openpyxl # pip install openpyxl

In [2]:
configuration = lexmachina.Configuration(
    host="https://api.lexmachina.com",
    access_token=os.environ["BEARER_TOKEN"]
)

In [3]:
api_client = lexmachina.ApiClient(configuration)

In [4]:
fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)

In [6]:
query = {
        "caseTypes": {
            "include": [
            "Antitrust"
            ]
        },
        "dates": {
            "terminated": {
            "onOrAfter": "2024-01-01",
            "onOrBefore": "2024-12-31"
            }
        },
        "page": 1,
        "pageSize": 100
    }

In [7]:
case_ids = []
done_paging = False

while not done_paging:
    query_response = fed_dist_case_api_instance.query_district_cases(query)

    if query_response.cases:
        current_page = query['page']
        print(f'{current_page=}')
        result_case_ids = [caseref.district_case_id for caseref in query_response.cases]
        case_ids += result_case_ids
        query['page'] = current_page + 1

    else:
        print(f'Number of Antitrust cases terminated in 2024: {len(case_ids)}')
        done_paging=True

current_page=1
current_page=2
current_page=3
current_page=4
current_page=5
current_page=6
current_page=7
Number of Antitrust cases terminated in 2024: 671


In [8]:
case_data = []

for case_id in case_ids:
    case_data.append(fed_dist_case_api_instance.get_district_case(case_id))
    if len(case_data) % 20 == 0:
         print(f'{len(case_data)} out of {len(case_ids)} processed')

Now getting indvidual case info
20 out of 671 processed
40 out of 671 processed
60 out of 671 processed
80 out of 671 processed
100 out of 671 processed
120 out of 671 processed
140 out of 671 processed
160 out of 671 processed
180 out of 671 processed
200 out of 671 processed
220 out of 671 processed
240 out of 671 processed
260 out of 671 processed
280 out of 671 processed
300 out of 671 processed
320 out of 671 processed
340 out of 671 processed
360 out of 671 processed
380 out of 671 processed
400 out of 671 processed
420 out of 671 processed
440 out of 671 processed
460 out of 671 processed
480 out of 671 processed
500 out of 671 processed
520 out of 671 processed
540 out of 671 processed
560 out of 671 processed
580 out of 671 processed
600 out of 671 processed
620 out of 671 processed
640 out of 671 processed
660 out of 671 processed


In [9]:
case_data[0].judges

[FederalJudge(name='Mitchell S. Goldberg', federal_judge_id=3193)]

In [10]:
cases_by_judge = defaultdict(list)

In [11]:
for c in case_data:
    for j in c.judges:
        cases_by_judge[(j.name, j.federal_judge_id)].append(dict(case_id=c.district_case_id, duration=c.dates.terminated - c.dates.filed))
        

In [12]:
len(cases_by_judge)

378

In [13]:
list(cases_by_judge)[:5]

[('Mitchell S. Goldberg', 3193),
 ('Edmond E-Min Chang', 3342),
 ('Miriam Goldman Cedarbaum', 406),
 ('Lorna Gail Schofield', 3451),
 ('Joel A. Pisano', 2851)]

In [14]:
cases_by_judge[('Lorna Gail Schofield', 3451)]

[{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}]

In [15]:
all_durations = []

In [16]:
for case_group in cases_by_judge.values():
    all_durations += [c['duration'].days for c in case_group]

In [17]:
all_durations[:5]

[6752, 4015, 4011, 3980, 3952]

In [18]:
round(statistics.mean(all_durations))

1084

In [19]:
statistics.median(all_durations)

451

In [20]:
case_count_by_judges = [(judge_info, len(cases_by_judge[judge_info])) for judge_info in cases_by_judge]

In [21]:
sorted_case_counts_by_judges = sorted(case_count_by_judges, key=lambda x: x[-1], reverse=True)

In [22]:
sorted_case_counts_by_judges[:5]

[(('Edgardo Ramos', 3405), 37),
 (('Waverly David Crenshaw Jr.', 3603), 34),
 (('Sarah Elizabeth Pitlyk', 7465646), 30),
 (('P. Kevin Castel', 3029), 23),
 (('Sara Elizabeth Lioi', 3140), 23)]

In [23]:
sorted_case_counts_by_judges[-5:]

[(('Steven Douglas Merryday', 1627), 1),
 (('Mary Stenson Scriven', 3189), 1),
 (('John George Koeltl', 1305), 1),
 (('John Charles Hinderaker', 8938396), 1),
 (('Yvonne Gonzalez Rogers', 3404), 1)]

In [24]:
list(cases_by_judge.values())[:5]

[[{'case_id': 97091, 'duration': datetime.timedelta(days=6752)},
  {'case_id': 2000045171, 'duration': datetime.timedelta(days=4015)},
  {'case_id': 2000045229, 'duration': datetime.timedelta(days=4011)},
  {'case_id': 2000046633, 'duration': datetime.timedelta(days=3980)},
  {'case_id': 2000047864, 'duration': datetime.timedelta(days=3952)},
  {'case_id': 2000049181, 'duration': datetime.timedelta(days=3918)},
  {'case_id': 2000049340, 'duration': datetime.timedelta(days=3912)},
  {'case_id': 2000049341, 'duration': datetime.timedelta(days=3912)},
  {'case_id': 2000049655, 'duration': datetime.timedelta(days=3905)},
  {'case_id': 2000049658, 'duration': datetime.timedelta(days=3905)},
  {'case_id': 2000049746, 'duration': datetime.timedelta(days=3903)},
  {'case_id': 2000051026, 'duration': datetime.timedelta(days=3863)}],
 [{'case_id': 48907, 'duration': datetime.timedelta(days=6117)},
  {'case_id': 2007889413, 'duration': datetime.timedelta(days=935)},
  {'case_id': 2034461979, 'dur

In [26]:
for j in sorted_case_counts_by_judges[:5]:
    judge_cases = cases_by_judge[j[0]]
    judge_durations = [c['duration'].days for c in judge_cases]
    print('--------------------')
    print(f'judge name: {j[0][0]}')
    print(f'total num cases: {j[1]}')
    print(f'average duration: {round(statistics.mean(judge_durations))}')
    print(f'median duration: {statistics.median(judge_durations)}')

--------------------
judge name: Edgardo Ramos
total num cases: 37
average duration: 133
median duration: 95
--------------------
judge name: Waverly David Crenshaw Jr.
total num cases: 34
average duration: 280
median duration: 307.0
--------------------
judge name: Sarah Elizabeth Pitlyk
total num cases: 30
average duration: 1146
median duration: 1178.0
--------------------
judge name: P. Kevin Castel
total num cases: 23
average duration: 823
median duration: 912
--------------------
judge name: Sara Elizabeth Lioi
total num cases: 23
average duration: 65
median duration: 71


In [28]:
case_data[0].law_firms[:3]

[LawFirm(name='Kessler Topaz Meltzer & Check', law_firm_id=27, client_party_ids=[257121, 52552843, 231694, 37904, 23356662, 20047290, 24917852, 37648157]),
 LawFirm(name='Hagens Berman Sobol Shapiro', law_firm_id=30, client_party_ids=[231694]),
 LawFirm(name='Berger Montague', law_firm_id=51, client_party_ids=[231694])]

In [29]:
case_data[0].parties[:3]

[Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff'),
 Party(name='Cephalon, Inc.', party_id=20036179, role='Defendant'),
 Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff')]

In [30]:
parties_by_id_by_case_id = {}

In [31]:
for c in case_data:
    parties_by_id_by_case_id[c.district_case_id] = {}
    for p in c.parties:
        parties_by_id_by_case_id[c.district_case_id][p.party_id] = p

In [114]:
list(parties_by_id_by_case_id.keys())[:5]

[97091, 48907, 2000009555, 2000026715, 2000028620]

In [115]:
parties_by_id_by_case_id[97091]

{24917852: Party(name='Debra Langan', party_id=24917852, role='Plaintiff'),
 23356662: Party(name='Pennsylvania Turnpike Commission', party_id=23356662, role='Plaintiff'),
 257121: Party(name='SHIRLEY PANEBIANO', party_id=257121, role='Plaintiff'),
 231694: Party(name='AvMed, Inc.', party_id=231694, role='Plaintiff'),
 34912145: Party(name='Jeffrey R. Krinsk', party_id=34912145, role='Plaintiff'),
 37648157: Party(name='District Council 37 Health and Security Plan', party_id=37648157, role='Plaintiff'),
 9750: Party(name='Eckerd Corporation', party_id=9750, role='Defendant'),
 2993: Party(name='Teva Pharmaceutical Industries Ltd.', party_id=2993, role='Defendant'),
 1087: Party(name='Teva Pharmaceuticals USA, Inc.', party_id=1087, role='Defendant'),
 27: Party(name='Mylan Pharmaceuticals, Inc.', party_id=27, role='Defendant'),
 1855: Party(name='Barr Laboratories, Inc.', party_id=1855, role='Defendant'),
 52552843: Party(name='End Payor Class Plaintiffs', party_id=52552843, role='Plain

In [192]:
column_names = ['case id', 'case number', 'case title', 'law_firm', 'law_firm_id', 'party', 'party_id', 'role']

In [193]:
rows = []

In [194]:
rows.append(column_names)

In [195]:
for c in case_data:
    for law_firm in c.law_firms:
        for party_id in law_firm.client_party_ids:
            party = parties_by_id_by_case_id[c.district_case_id][party_id]
            rows.append(
                (
                    c.district_case_id,
                    c.case_no,
                    c.title,
                    law_firm.name,
                    law_firm.law_firm_id,
                    party.name,
                    party.party_id,
                    party.role
            )
            )
            

In [196]:
len(rows)

19084

In [197]:
rows[0]

['case id',
 'case number',
 'case title',
 'law_firm',
 'law_firm_id',
 'party',
 'party_id',
 'role']

In [198]:
rows[1]

(97091,
 '2:06-cv-01833-MSG',
 'VISTA HEALTHPLAN, INC. v. CEPHALON, INC. et al',
 'Kessler Topaz Meltzer & Check',
 27,
 'SHIRLEY PANEBIANO',
 257121,
 'Plaintiff')

In [199]:
rows[10000]

(2005150350,
 '3:20-cv-05792-JD',
 'In re Google Play Developer Antitrust Litigation',
 "O'Melveny & Myers",
 227639559,
 'Google Asia Pacific PTE. Limited',
 52824280,
 'Defendant')

In [201]:
rows[-1]

(2034774512,
 '3:24-cv-09118-VC',
 'Kushner et al v. Chunghwa Picture Tubes, Ltd. et al',
 'Goldman Scarlato & Penny',
 15211344,
 'Barry Kushner',
 10805,
 'Plaintiff')

In [140]:
from openpyxl import Workbook

In [202]:
wb.close()

In [203]:
wb = Workbook()

In [204]:
ws = wb.active

In [205]:
for r in rows:
    ws.append(r)

In [206]:
wb.save("antitrust_terminated_2024_law_firms.xlsx")

In [207]:
wb.close()