In [7]:
import requests
import os
import json
from dotenv import load_dotenv

# Load API Key & Endpoint
load_dotenv()
API_KEY = os.getenv("COURTLISTENER_API_KEY")
BASE_URL = "https://www.courtlistener.com/api/rest/v4/"

HEADERS = {"Authorization": f"Token {API_KEY}"}

# Define categories
categories = ["family law", "employment law", "criminal law", "contract law"]
collected_cases = {}

for category in categories:
    print(f"🔍 Fetching cases for: {category.title()}")

    params = {
        "search": category,
        "date_filed__gte": "2024-01-01", 
        "date_filed__lte": "2024-12-31", 
        "page_size": 20 
    }

    response = requests.get(f"{BASE_URL}opinions/", headers=HEADERS, params=params)

    if response.status_code == 200:
        opinion_data = response.json()
        collected_cases[category] = opinion_data["results"]

        filename = f"{category.replace(' ', '_')}_cases_2024.json"
        with open(filename, "w") as file:
            json.dump(opinion_data, file, indent=4)

        print(f"{category.title()} cases saved to{filename}\n")

    else:
        print(f"Error fetching {category.title()} cases:", response.status_code, response.text)

with open("all_cases_2024.json", "w") as file:
    json.dump(collected_cases, file, indent=4)

print("All case categories saved in all_cases_2024.json")


🔍 Fetching cases for: Family Law
Family Law cases saved tofamily_law_cases_2024.json

🔍 Fetching cases for: Employment Law
Employment Law cases saved toemployment_law_cases_2024.json

🔍 Fetching cases for: Criminal Law
Criminal Law cases saved tocriminal_law_cases_2024.json

🔍 Fetching cases for: Contract Law
Contract Law cases saved tocontract_law_cases_2024.json

All case categories saved in all_cases_2024.json


In [5]:
import json
import pandas as pd

# Define file paths
file_paths = {
    "contract_law": "contract_law_cases_2024.json",
    "criminal_law": "criminal_law_cases_2024.json",
    "employment_law": "employment_law_cases_2024.json",
    "family_law": "family_law_cases_2024.json"
}

# Load and process JSON files
dataframes = {}

for category, file_path in file_paths.items():
    with open(file_path, "r") as file:
        data = json.load(file)
        cases = data.get("results", [])  # Extract the "results" list

        # Convert to DataFrame
        df = pd.DataFrame(cases, columns=["id", "absolute_url", "date_created", "date_modified", 
                                           "page_count", "download_url"])
        dataframes[category] = df

# Display the DataFrames
dataframes


{'contract_law':           id                                       absolute_url  \
 0   10824703  /opinion/10358115/state-of-tennessee-v-bryant-...   
 1   10824702  /opinion/10358114/1448-28th-avenue-llc-v-city-...   
 2   10824701  /opinion/10358113/abualya-v-all-stop-pipes-and...   
 3   10824700        /opinion/10358112/adair-homes-inc-v-brooks/   
 4   10824699                  /opinion/10358111/ayala-v-fhuere/   
 5   10824698   /opinion/10358110/chavez-meza-v-state-of-oregon/   
 6   10824697                    /opinion/10358109/davis-v-deen/   
 7   10824696  /opinion/10358108/dept-of-human-services-v-b-c-b/   
 8   10824695  /opinion/10358107/dept-of-human-services-v-r-w-c/   
 9   10824694    /opinion/10358106/fisk-v-fred-meyer-stores-inc/   
 10  10824693  /opinion/10358105/garrett-sims-v-dept-of-human...   
 11  10824692  /opinion/10358104/griffith-v-property-and-casu...   
 12  10824691                     /opinion/10358103/kim-v-brown/   
 13  10824690        /opinion/10

In [8]:
# Display the column names for each category DataFrame
for category, df in dataframes.items():
    print(f"\n📌 **Columns in {category.title()} Cases DataFrame:**")
    print(df.columns.tolist())  # List column names
    print("="*80)



📌 **Columns in Contract_Law Cases DataFrame:**
['id', 'absolute_url', 'date_created', 'date_modified', 'page_count', 'download_url']

📌 **Columns in Criminal_Law Cases DataFrame:**
['id', 'absolute_url', 'date_created', 'date_modified', 'page_count', 'download_url']

📌 **Columns in Employment_Law Cases DataFrame:**
['id', 'absolute_url', 'date_created', 'date_modified', 'page_count', 'download_url']

📌 **Columns in Family_Law Cases DataFrame:**
['id', 'absolute_url', 'date_created', 'date_modified', 'page_count', 'download_url']
