# ***Finding Case Outcomes and Labelling Data***

In this notebook, i aim to try and isolate case outcomes from the documents.csv
file manually, as well as using the RECAP open sources PACER data.

For missing outcomes that I fail to match, it might be useful idea to train LegalBERT on a downstream classification task using the labelled data and then using predicted case outcomes in my regressions.

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#loading up google drive to access csv files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#loading my csv file
documents = pd.read_csv('/content/drive/MyDrive/ECO225_Final_Project/PART1/documents.csv')

  documents = pd.read_csv('/content/drive/MyDrive/ECO225_Final_Project/PART1/documents.csv')


In [3]:
#isolating relavent variables
clean_docs = documents.copy()
clean_docs = clean_docs.drop(columns=['case_number_raw', 'attachment', 'doc_date_filed', 'doc_date_uploaded', 'document_url'])

#creating column that replaces NaN long_description values with short_description's
clean_docs['combined_description'] = clean_docs['long_description'].fillna(clean_docs['short_description'])


#dropping all missiing long_descriptions
clean_docs = clean_docs.dropna(subset=['combined_description'])

In [4]:
# Set the display option to show full column contents
pd.set_option('display.max_colwidth', None)

clean_docs.head()

Unnamed: 0,case_row_id,case_number,district_id,doc_count,doc_number,short_description,long_description,combined_description
0,1.0,0:79-cv-06704,flsd,1,37.0,,"COPY OF PAPER DOCKET SHEET (kw, Deputy Clerk) (Entered: 08/03/2000)","COPY OF PAPER DOCKET SHEET (kw, Deputy Clerk) (Entered: 08/03/2000)"
1,1.0,0:79-cv-06704,flsd,2,,,"CASE CLOSED. Case and Motions no longer referred to Magistrate. (kw, Deputy Clerk) (Entered: 08/03/2000)","CASE CLOSED. Case and Motions no longer referred to Magistrate. (kw, Deputy Clerk) (Entered: 08/03/2000)"
2,3.0,0:83-cv-06860,flsd,1,123.0,,COPY OF PAPER DOCKET SHEET (Former Deputy Clerk) (Entered: 02/13/2004),COPY OF PAPER DOCKET SHEET (Former Deputy Clerk) (Entered: 02/13/2004)
3,3.0,0:83-cv-06860,flsd,2,,,Case closed (Former Deputy Clerk) (Entered: 03/05/1992),Case closed (Former Deputy Clerk) (Entered: 03/05/1992)
4,3.0,0:83-cv-06860,flsd,3,,,Case reopened (Former Deputy Clerk) (Entered: 05/13/1991),Case reopened (Former Deputy Clerk) (Entered: 05/13/1991)


---
# STEP 1: Manually Classify Based on long_descriptions that Include Judgements

This will serve as a high accuracy base line to train/test NLP models used later down the road:


In [5]:
# Define judgment-related keywords
judgment_keywords = ["judgment", "judgement"]

# Filter rows where any keyword appears in the combined_description column
judgment_mask = clean_docs['combined_description'].str.contains(
                      '|'.join(judgment_keywords), case=False, na=False)

# Resetting index to access columns by name
judgment_df = clean_docs[judgment_mask][['case_row_id', 'case_number',
                                         'doc_count', 'long_description',
                                         'combined_description']]


#first word should be "FINAL" - additional accuracy filter
clean_judgments = pd.DataFrame()
clean_judgments = judgment_df[judgment_df['long_description'
                                    ].str.split().str[0] == "FINAL"]

In [6]:
clean_judgments = clean_judgments.set_index(['case_row_id', 'case_number', 'doc_count']).sort_index()
clean_judgments

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,long_description,combined_description
case_row_id,case_number,doc_count,Unnamed: 3_level_1,Unnamed: 4_level_1
3.0,0:83-cv-06860,5,"FINAL JUDGMENT for Kenneth R. Cornwall against U. S. Construction. Defendant permanently enjoined from manufacturing, advertising, etc. any device infringing patent; Permanent injunction effective as of date of entry of judgment; plaintiff shall recover from principal sum of $13,647.42 with pre-judgment interest from 4/30/85 thru 7/7/87 in the amount of $95.00 FOR WHICH LET EXECUTION ISSUE. ( signed by Judge Jose A. Gonzalez Jr. on 7/21/87) CCAP/M (Former Deputy Clerk) (Entered: 05/13/1991)","FINAL JUDGMENT for Kenneth R. Cornwall against U. S. Construction. Defendant permanently enjoined from manufacturing, advertising, etc. any device infringing patent; Permanent injunction effective as of date of entry of judgment; plaintiff shall recover from principal sum of $13,647.42 with pre-judgment interest from 4/30/85 thru 7/7/87 in the amount of $95.00 FOR WHICH LET EXECUTION ISSUE. ( signed by Judge Jose A. Gonzalez Jr. on 7/21/87) CCAP/M (Former Deputy Clerk) (Entered: 05/13/1991)"
53.0,0:91-cv-06658,13,"FINAL JUDGMENT entered for Clotilde, Inc. and against Solar-Kist Corporation on the complaint and judgment is entered for defendant-counter-plaintiff on the counterclaim. Patents (4,320,699 and 4,597,812) are declared invalid. ( signed by Judge Kenneth L. Ryskamp on 9/8/93) CCAP/M (ea, Deputy Clerk) (Entered: 09/14/1993)","FINAL JUDGMENT entered for Clotilde, Inc. and against Solar-Kist Corporation on the complaint and judgment is entered for defendant-counter-plaintiff on the counterclaim. Patents (4,320,699 and 4,597,812) are declared invalid. ( signed by Judge Kenneth L. Ryskamp on 9/8/93) CCAP/M (ea, Deputy Clerk) (Entered: 09/14/1993)"
72.0,0:93-cv-06063,3,"FINAL JUDGMENT Dismissing case Status Conference set for 3/26/99 is Canceled (signed by Judge Jose A. Gonzalez Jr. on 3/4/99) CCAP (ss, Deputy Clerk) (Entered: 03/08/1999)","FINAL JUDGMENT Dismissing case Status Conference set for 3/26/99 is Canceled (signed by Judge Jose A. Gonzalez Jr. on 3/4/99) CCAP (ss, Deputy Clerk) (Entered: 03/08/1999)"
75.0,0:93-cv-06076,104,"FINAL JUDGMENT for Jaswant S. Pannu, Jaswant S. Pannu against IOLAB Corporation in the total amount of $670,667.47 ( signed by Judge Wilkie D. Ferguson Jr. on 6/12/97) CCAP/M (kp, Deputy Clerk) (Entered: 06/18/1997)","FINAL JUDGMENT for Jaswant S. Pannu, Jaswant S. Pannu against IOLAB Corporation in the total amount of $670,667.47 ( signed by Judge Wilkie D. Ferguson Jr. on 6/12/97) CCAP/M (kp, Deputy Clerk) (Entered: 06/18/1997)"
75.0,0:93-cv-06076,125,"FINAL JUDGMENT on jury verdict finding for IOLAB Corporation on claim for infringement by the Kelman 4-point lenses and the Small Kelman 3-point lenses. Judgment is entered for the plaintiffs on the claim for infringement by the large kelman 3-point lenses and both Bechart lenses. Judgment is entered for the defendant on the claim for willful infringement as to any of the defendant's intraocular lenses. Judgment is entered for the plaintiffs on the defendant's affirmative defenses of invalidity of the plaintiff's patent. Judgment is entered fthat the plaintiffs receive a reasonable royalty rate of 7% on the defendant's sales of the infringing lenses. Jurisdiction is reserved to (a) calculate the dollar amount of royalties based on the defendant's sales of the infringing lenses; (b) fashion an appropriate decree of injunction; and (c) determine an award reasonable fees and costs. ( signed by Judge Wilkie D. Ferguson Jr. on 2/10/97) CCAP/M (kp, Deputy Clerk) (Entered: 02/12/1997)","FINAL JUDGMENT on jury verdict finding for IOLAB Corporation on claim for infringement by the Kelman 4-point lenses and the Small Kelman 3-point lenses. Judgment is entered for the plaintiffs on the claim for infringement by the large kelman 3-point lenses and both Bechart lenses. Judgment is entered for the defendant on the claim for willful infringement as to any of the defendant's intraocular lenses. Judgment is entered for the plaintiffs on the defendant's affirmative defenses of invalidity of the plaintiff's patent. Judgment is entered fthat the plaintiffs receive a reasonable royalty rate of 7% on the defendant's sales of the infringing lenses. Jurisdiction is reserved to (a) calculate the dollar amount of royalties based on the defendant's sales of the infringing lenses; (b) fashion an appropriate decree of injunction; and (c) determine an award reasonable fees and costs. ( signed by Judge Wilkie D. Ferguson Jr. on 2/10/97) CCAP/M (kp, Deputy Clerk) (Entered: 02/12/1997)"
...,...,...,...,...
104642.0,6:08-cv-00015,117,"FINAL JUDGMENT. ORDERED, ADJUDGED AND DECREED that all claims made by Plaintiff or that could have been made in this action be DISMISSED in their entirety, with prejudice. Signed by Magistrate Judge John D. Love on 6/2/2009. (gsg) (Entered: 06/02/2009)","FINAL JUDGMENT. ORDERED, ADJUDGED AND DECREED that all claims made by Plaintiff or that could have been made in this action be DISMISSED in their entirety, with prejudice. Signed by Magistrate Judge John D. Love on 6/2/2009. (gsg) (Entered: 06/02/2009)"
104644.0,4:05-cv-00772,17,"FINAL JUDGMENT...all claims and causes of action asserted by pltf in the case against defts are dismissed. (Signed by Judge John McBryde on 5/12/06) (wrb, ) (Entered: 05/12/2006)","FINAL JUDGMENT...all claims and causes of action asserted by pltf in the case against defts are dismissed. (Signed by Judge John McBryde on 5/12/06) (wrb, ) (Entered: 05/12/2006)"
104645.0,4:14-cv-00884,38,FINAL JUDGMENT Consistent with the joint stipulation of dismissal filed by plaintiff and defendants. All claims dismissed with prejudice. Orders each party to bear costs of court incurred by such party. The clerk will prepare the final Report to the Patent/Trademark or Copyright Office. (Ordered by Judge John McBryde on 2/17/2015) (ult) Modified on 2/18/2015 (ult). (Entered: 02/18/2015),FINAL JUDGMENT Consistent with the joint stipulation of dismissal filed by plaintiff and defendants. All claims dismissed with prejudice. Orders each party to bear costs of court incurred by such party. The clerk will prepare the final Report to the Patent/Trademark or Copyright Office. (Ordered by Judge John McBryde on 2/17/2015) (ult) Modified on 2/18/2015 (ult). (Entered: 02/18/2015)
104646.0,2:07-cv-00288,226,"FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)","FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)"


Now we have a comprehensive dataframe containing only those files which we know
for sure are final judgments, maybe we can try using legalBERT on this for
classification of outcomes
---


In [7]:
pacer = pd.read_csv('/content/drive/MyDrive/ECO225_Final_Project/PART1/pacer_cases.csv')

In [8]:
pacer

Unnamed: 0,pacer_id,case_number,district_id,court_name,case_name,date_filed,date_closed,nos
0,,2:04-cv-01438,azd,Arizona District Court,APM Design Lab Inc v. Rubicad Corporation,2004-07-14,,
1,,2:04-cv-01438,azd,Arizona District Court,APM Design Lab Inc v. Rubicad Corporation,2004-07-14,,
2,,2:05-at-03203,azd,Arizona District Court (Phoenix),"Test Advantage, Inc. v.",2005-10-25,,
3,,1:03-cv-05889,cacd,California Central District Court,"La Leche League Intl v. Mothers Work, Inc.",2003-08-19,,
4,,2:03-cv-03968,cacd,California Central District Court,"Amini Innovation v. Pacific Century Inc, et al",2003-06-04,,
...,...,...,...,...,...,...,...,...
96994,371335.0,3:20-cv-09482,cand,California Northern District Court,"Tekvoke, LLC v. Brekeke Software, Inc.",2020-12-31,2021-04-28,830.0
96995,371334.0,4:20-cv-09481,cand,California Northern District Court,Topdown Licensing LLC v. Guillemot Inc.,2020-12-31,2022-06-01,830.0
96996,806262.0,2:20-cv-11800,cacd,California Central District Court,"Tsteigen, Inc. et al v. Midwest Motor Supply Co.",2020-12-31,2021-03-18,830.0
96997,1118843.0,6:20-cv-01216,txwd,Texas Western District Court,"Ocean Semiconductor LLC v. Western Digital Technologies, Inc.",2020-12-31,2022-09-19,830.0


In [9]:
#adding pacer_id into clean_judgments for API based labelling of case outcomes
pacer_id = pacer[['case_number', 'pacer_id']]
clean_judgments = clean_judgments.merge(pacer_id, on='case_number', how='left')

In [10]:
clean_judgments

Unnamed: 0,case_number,long_description,combined_description,pacer_id
0,0:83-cv-06860,"FINAL JUDGMENT for Kenneth R. Cornwall against U. S. Construction. Defendant permanently enjoined from manufacturing, advertising, etc. any device infringing patent; Permanent injunction effective as of date of entry of judgment; plaintiff shall recover from principal sum of $13,647.42 with pre-judgment interest from 4/30/85 thru 7/7/87 in the amount of $95.00 FOR WHICH LET EXECUTION ISSUE. ( signed by Judge Jose A. Gonzalez Jr. on 7/21/87) CCAP/M (Former Deputy Clerk) (Entered: 05/13/1991)","FINAL JUDGMENT for Kenneth R. Cornwall against U. S. Construction. Defendant permanently enjoined from manufacturing, advertising, etc. any device infringing patent; Permanent injunction effective as of date of entry of judgment; plaintiff shall recover from principal sum of $13,647.42 with pre-judgment interest from 4/30/85 thru 7/7/87 in the amount of $95.00 FOR WHICH LET EXECUTION ISSUE. ( signed by Judge Jose A. Gonzalez Jr. on 7/21/87) CCAP/M (Former Deputy Clerk) (Entered: 05/13/1991)",27008.0
1,0:91-cv-06658,"FINAL JUDGMENT entered for Clotilde, Inc. and against Solar-Kist Corporation on the complaint and judgment is entered for defendant-counter-plaintiff on the counterclaim. Patents (4,320,699 and 4,597,812) are declared invalid. ( signed by Judge Kenneth L. Ryskamp on 9/8/93) CCAP/M (ea, Deputy Clerk) (Entered: 09/14/1993)","FINAL JUDGMENT entered for Clotilde, Inc. and against Solar-Kist Corporation on the complaint and judgment is entered for defendant-counter-plaintiff on the counterclaim. Patents (4,320,699 and 4,597,812) are declared invalid. ( signed by Judge Kenneth L. Ryskamp on 9/8/93) CCAP/M (ea, Deputy Clerk) (Entered: 09/14/1993)",34235.0
2,0:93-cv-06063,"FINAL JUDGMENT Dismissing case Status Conference set for 3/26/99 is Canceled (signed by Judge Jose A. Gonzalez Jr. on 3/4/99) CCAP (ss, Deputy Clerk) (Entered: 03/08/1999)","FINAL JUDGMENT Dismissing case Status Conference set for 3/26/99 is Canceled (signed by Judge Jose A. Gonzalez Jr. on 3/4/99) CCAP (ss, Deputy Clerk) (Entered: 03/08/1999)",38634.0
3,0:93-cv-06076,"FINAL JUDGMENT for Jaswant S. Pannu, Jaswant S. Pannu against IOLAB Corporation in the total amount of $670,667.47 ( signed by Judge Wilkie D. Ferguson Jr. on 6/12/97) CCAP/M (kp, Deputy Clerk) (Entered: 06/18/1997)","FINAL JUDGMENT for Jaswant S. Pannu, Jaswant S. Pannu against IOLAB Corporation in the total amount of $670,667.47 ( signed by Judge Wilkie D. Ferguson Jr. on 6/12/97) CCAP/M (kp, Deputy Clerk) (Entered: 06/18/1997)",38647.0
4,0:93-cv-06076,"FINAL JUDGMENT on jury verdict finding for IOLAB Corporation on claim for infringement by the Kelman 4-point lenses and the Small Kelman 3-point lenses. Judgment is entered for the plaintiffs on the claim for infringement by the large kelman 3-point lenses and both Bechart lenses. Judgment is entered for the defendant on the claim for willful infringement as to any of the defendant's intraocular lenses. Judgment is entered for the plaintiffs on the defendant's affirmative defenses of invalidity of the plaintiff's patent. Judgment is entered fthat the plaintiffs receive a reasonable royalty rate of 7% on the defendant's sales of the infringing lenses. Jurisdiction is reserved to (a) calculate the dollar amount of royalties based on the defendant's sales of the infringing lenses; (b) fashion an appropriate decree of injunction; and (c) determine an award reasonable fees and costs. ( signed by Judge Wilkie D. Ferguson Jr. on 2/10/97) CCAP/M (kp, Deputy Clerk) (Entered: 02/12/1997)","FINAL JUDGMENT on jury verdict finding for IOLAB Corporation on claim for infringement by the Kelman 4-point lenses and the Small Kelman 3-point lenses. Judgment is entered for the plaintiffs on the claim for infringement by the large kelman 3-point lenses and both Bechart lenses. Judgment is entered for the defendant on the claim for willful infringement as to any of the defendant's intraocular lenses. Judgment is entered for the plaintiffs on the defendant's affirmative defenses of invalidity of the plaintiff's patent. Judgment is entered fthat the plaintiffs receive a reasonable royalty rate of 7% on the defendant's sales of the infringing lenses. Jurisdiction is reserved to (a) calculate the dollar amount of royalties based on the defendant's sales of the infringing lenses; (b) fashion an appropriate decree of injunction; and (c) determine an award reasonable fees and costs. ( signed by Judge Wilkie D. Ferguson Jr. on 2/10/97) CCAP/M (kp, Deputy Clerk) (Entered: 02/12/1997)",38647.0
...,...,...,...,...
5416,2:07-cv-00288,"FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)","FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)",104260.0
5417,2:07-cv-00288,"FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)","FINAL JUDGMENT.(Signed by Chief Judge Hayden Head) Parties notified.(mserpa, ) (Entered: 07/21/2009)",515026.0
5418,1:12-cv-00484,"FINAL JUDGMENT affirming that claims brought by Plaintiff Qcue, Inc. are DISMISSED WITHOUT PREJUDICE. All costs of suit are taxed against the party incurring the same. Signed by Judge Sam Sparks. (klw) (Entered: 09/30/2013)","FINAL JUDGMENT affirming that claims brought by Plaintiff Qcue, Inc. are DISMISSED WITHOUT PREJUDICE. All costs of suit are taxed against the party incurring the same. Signed by Judge Sam Sparks. (klw) (Entered: 09/30/2013)",140172.0
5419,1:12-cv-00484,"FINAL JUDGMENT affirming that claims brought by Plaintiff Qcue, Inc. are DISMISSED WITHOUT PREJUDICE. All costs of suit are taxed against the party incurring the same. Signed by Judge Sam Sparks. (klw) (Entered: 09/30/2013)","FINAL JUDGMENT affirming that claims brought by Plaintiff Qcue, Inc. are DISMISSED WITHOUT PREJUDICE. All costs of suit are taxed against the party incurring the same. Signed by Judge Sam Sparks. (klw) (Entered: 09/30/2013)",48568.0


# Using RECAP API to get Case Outcomes

In [24]:
import requests
import time

# API Setup
API_KEY = "e3327ec94df6a3e5d37beaeb611fce27825c6c84"  # Your API Key
API_URL = "https://www.courtlistener.com/api/rest/v4/fjc-integrated-database/"
HEADERS = {"Authorization": f"Token {API_KEY}"}

# Load case numbers (limiting to first 50 for testing)
case_numbers = clean_judgments["case_number"].tolist()[:10]  # Limit to first 50 cases

# Function to fetch only the "Judgment" field
def get_judgment(case_number):
    params = {
    "docket_number": case_number,
    "pacer_case_id": pacer_id  }
    response = requests.get(API_URL, headers=HEADERS, params=params)

    if response.status_code == 200:
        data = response.json()
        if "results" in data and len(data["results"]) > 0:
            case_data = data["results"][0]

            # Debugging step: Print all keys to find the correct field name
            print(f"Available keys for {case_number}: {case_data.keys()}")

            # Try extracting "judgment" (if available) or test other keys
            judgment_value = case_data.get("judgment", case_data.get("nature_of_judgement", "Not Found"))
            return {"Case Number": case_number, "Judgment": judgment_value}

    return {"Case Number": case_number, "Judgment": "Not Found"}

# Run for only first 50 cases
results = []
for case in case_numbers:
    results.append(get_judgment(case))
    time.sleep(1)  # Prevent API rate limiting

# Convert to DataFrame & Save Results
df_results = pd.DataFrame(results)
df_results.to_csv("case_judgments_sample.csv", index=False)

print("✅ Judgment data retrieval complete for 50 cases. Saved to case_judgments_sample.csv")


Available keys for 0:83-cv-06860: dict_keys(['resource_uri', 'date_created', 'date_modified', 'dataset_source', 'office', 'docket_number', 'origin', 'date_filed', 'jurisdiction', 'nature_of_suit', 'title', 'section', 'subsection', 'diversity_of_residence', 'class_action', 'monetary_demand', 'county_of_residence', 'arbitration_at_filing', 'arbitration_at_termination', 'multidistrict_litigation_docket_number', 'plaintiff', 'defendant', 'date_transfer', 'transfer_office', 'transfer_docket_number', 'transfer_origin', 'date_terminated', 'termination_class_action_status', 'procedural_progress', 'disposition', 'nature_of_judgement', 'amount_received', 'judgment', 'pro_se', 'year_of_tape', 'nature_of_offense', 'version', 'circuit', 'district'])
Available keys for 0:91-cv-06658: dict_keys(['resource_uri', 'date_created', 'date_modified', 'dataset_source', 'office', 'docket_number', 'origin', 'date_filed', 'jurisdiction', 'nature_of_suit', 'title', 'section', 'subsection', 'diversity_of_residenc

In [34]:
print(requests.get("https://www.courtlistener.com/api/rest/v4/fjc-integrated-database/",
    headers={"Authorization": "e3327ec94df6a3e5d37beaeb611fce27825c6c84"},
    params={"docket_number": "0:91-cv-06658"}).json().get("results", [{}])[0].get("defendant", "Not Found"))

MUSIC TRIBE GLOBAL BRANDS LTD.


In [36]:
import requests

API_KEY = "e3327ec94df6a3e5d37beaeb611fce2785c6c84"
API_URL = "https://www.courtlistener.com/api/rest/v4/fjc-integrated-database/"
HEADERS = {"Authorization": f"Token {API_KEY}"}

case_number = "0:91-cv-06658"

response = requests.get(API_URL, headers=HEADERS, params={"docket_number": case_number})

if response.status_code == 200:
    data = response.json()
    print(f"\n🔹 API returned {data['count']} cases for {case_number}")
    for case in data.get("results", []):
        print(f"📌 Docket: {case.get('docket_number')} | Filed: {case.get('date_filed')} | Court: {case.get('district')} | Judgment: {case.get('judgment', 'Not Found')}")
else:
    print(f"⚠️ API request failed, Status Code: {response.status_code}")


⚠️ API request failed, Status Code: 401
