In [1]:
import os
from collections import defaultdict
import statistics

import lexmachina
import ray # pip install ray
from ray.util import ActorPool
from openpyxl import Workbook # pip install openpyxl

2025-06-05 14:35:15,500	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
@ray.remote
class FedDistrictAPIActor:
    """ray actor class to use the Lex Machina API's Federal District Case endpoints"""

    def __init__(self):
        configuration = lexmachina.Configuration(
            host="https://api.lexmachina.com",
            access_token=os.environ["BEARER_TOKEN"]
        )
        api_client = lexmachina.ApiClient(configuration)
        self.fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)
        
        super().__init__()
        
    def get_query_case_ids(self, query, page_num):
        """Run query returning ids of cases matching query parameters"""
        query["page"] = page_num
        query_response = self.fed_dist_case_api_instance.query_district_cases(query)
        if query_response.cases:
            result_case_ids = [caseref.district_case_id for caseref in query_response.cases]
            return result_case_ids

    def get_case_data(self, case_id):
        """Get detailed case data (e.g. relevant dates, parties, lawyers, judges, etc) on the specified case"""
        return self.fed_dist_case_api_instance.get_district_case(case_id)

In [3]:
# Increase or decrease the number of actors depending on desired concurrency
num_actors = 3
actor_pool = ActorPool([FedDistrictAPIActor.remote() for _ in range(num_actors)])

2025-06-05 14:35:19,730	INFO worker.py:1879 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


In [4]:
query = {
        "caseTypes": {
            "include": [
            "Antitrust"
            ]
        },
        "dates": {
            "terminated": {
            "onOrAfter": "2024-01-01",
            "onOrBefore": "2024-12-31"
            }
        },
        "page": None, # This will be replaced when actor method is called
        "pageSize": 100
    }

In [5]:
# https://law.lexmachina.com/cases/?case_types-include=89&pending-from=2009-01-01&pending-to=&terminated_on-from=2024-01-01&terminated_on-to=2024-12-31
# we know there are 671 cases.
# So we will do pages 1 through 7.
%time case_ids_results = list(actor_pool.map_unordered(lambda a, v: a.get_query_case_ids.remote(query=query, page_num=v), range(1, 8)))

CPU times: user 42.8 ms, sys: 4.83 ms, total: 47.6 ms
Wall time: 1.97 s


In [6]:
case_ids = []
for case_ids_list in case_ids_results:
    if case_ids_list:
        case_ids += case_ids_listIncrease or decrease the number of actors depending on desired concurrency
num_actors = 3
actor_pool = ActorPool([FedDistrictAPIActor.remote() for _ in range(num_actors)])

In [7]:
len(case_ids)

671

In [8]:
case_ids[:5]

[2002287215, 2002451208, 2002454286, 2002459834, 2002469927]

In [9]:
%time case_data = list(actor_pool.map_unordered(lambda a, v: a.get_case_data.remote(case_id=v), case_ids))

CPU times: user 3.1 s, sys: 914 ms, total: 4.02 s
Wall time: 5min 15s


In [10]:
len(case_data)

671

In [13]:
parties_by_id_by_case_id = {}

In [14]:
for c in case_data:
    parties_by_id_by_case_id[c.district_case_id] = {}
    for p in c.parties:
        parties_by_id_by_case_id[c.district_case_id][p.party_id] = p

In [15]:
column_names = ['case id', 'case title', 'case number', 'court', 'judges', 'date filed', 'date terminated', 'law_firms', 'parties']

In [16]:
rows = []

In [17]:
rows.append(column_names)

In [18]:
for c in case_data:
    all_party_ids = []
    for law_firm in c.law_firms:
        all_party_ids += law_firm.client_party_ids
        
    parties = [parties_by_id_by_case_id[c.district_case_id][party_id].name for party_id in all_party_ids]
    parties = set(parties)
        
    rows.append(
        (
            c.district_case_id,
            c.title,
            c.case_no,
            c.court,
            ", ".join([j.name for j in c.judges]),
            c.dates.filed,
            c.dates.terminated,
            ", ".join([lf.name for lf in c.law_firms]),
            ", ".join(parties),
        )
    )
    

In [19]:
len(rows)

672

In [20]:
rows[0]

['case id',
 'case title',
 'case number',
 'court',
 'judges',
 'date filed',
 'date terminated',
 'law_firms',
 'parties']

In [21]:
rows[1]

(2002451208,
 'UFCW Local 1500 Welfare Fund v. Amgen Inc. et al',
 '1:19-cv-00369-CFC',
 'U.S. District Court for the District of Delaware',
 'Leonard Philip Stark, Colm Felix Connolly',
 datetime.date(2019, 2, 21),
 datetime.date(2024, 2, 17),
 'Morris, Nichols, Arsht & Tunnell, Mayer Brown, Gibson, Dunn & Crutcher, Roberts Law Firm (robertslawfirm.us), Shaw Keller, Fine, Kaplan & Black, R., Glancy Prongay & Murray, NastLaw, DiCello Levitt, Hausfeld, The Bifferato Firm, Labaton Keller Sucharow',
 'UFCW Local 1500 Welfare Fund, Watson Laboratories, Inc., Actavis Inc., Amgen Inc., Actavis Pharma, Inc., Teva Pharmaceuticals USA, Inc.')

In [22]:
rows[300]

(2008968442,
 'Garavanian et al v. JetBlue Airways Corporation, et al',
 '1:23-cv-10678-WGY',
 'U.S. District Court for the District of Massachusetts',
 'William G. Young, F. Dennis Saylor IV',
 datetime.date(2023, 3, 29),
 datetime.date(2024, 6, 18),
 'Department of Justice, Goodwin, WilmerHale, Cooley, Foley Hoag, Boies Schiller Flexner LLP, State of District of Columbia, State of Massachusetts, Choate Hall & Stewart, Mayer Brown, Paul, Weiss, Rifkind, Wharton & Garrison, Latham & Watkins, Freshfields Bruckhaus Deringer, Goulston & Storrs, Paul Hastings, A&O Shearman, Cleary Gottlieb Steen & Hamilton, Law Offices of Lingel H. Winters, Law Offices of Lawrence G. Papale, Bonsignore & Brewer, Law Offices of Jeffrey K. Perkins, Alioto Law Firm, Bonsignore Trial Lawyers, Larson, The Veen Firm, Nedeau Law Firm, Paradise Law, Law Office of Theresa D. Moore, Rule Garza Howley, Parr Brown Gee & Loveless',
 "Christine Whalen, State of Maryland, Harry Garavanian, State of New York, Don Freeland

In [23]:
wb = Workbook()

In [24]:
ws = wb.active

In [25]:
for r in rows:
    ws.append(r)

In [26]:
wb.save("antitrust_terminated_2024_law_firms.xlsx")

In [27]:
wb.close()