In [1]:
import requests
import pandas as pd
import time

### Judge ID and Political Affiliation

In [2]:
# Define the base URL and the API token
political_affiliations_url = "https://www.courtlistener.com/api/rest/v3/political-affiliations/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51" #Replace your Token as Needed
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
next_url = political_affiliations_url
affiliations = []
processed_person_ids = set()  # Set to track processed person IDs
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)

def handle_pause(message, duration):
    print(message)
    time.sleep(duration)
    return 0  # Reset request count

# Loop through the required number of pages of the API
while next_url:
    response = requests.get(next_url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        num_results = len(data['results'])
        print(f"Request {request_count + 1}: Retrieved {num_results} results")
        request_count += 1  # Count this request

        for result in data['results']:
            person_url = result['person']
            person_id = person_url.split('/')[-2]

            # Skip if this person_id has already been processed
            if person_id in processed_person_ids:
                continue

            political_party = result['political_party']
            affiliations.append({
                'person_id': person_id,
                'political_party': political_party
            })

            # Mark this person_id as processed
            processed_person_ids.add(person_id)

        # Update next_url to the URL for the next page of results
        next_url = data['next']

    # Handle 429 Too Many Requests
    elif response.status_code == 429:
        request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)

    else:
        print(f"Failed to retrieve data: {response.status_code}")
        break

    # Pause to avoid throttling
    if request_count >= max_requests_per_hour:
        request_count = handle_pause(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...", pause_duration)

# Create pandas DataFrame from the affiliations list
affiliations_df = pd.DataFrame(affiliations)

# Display the DataFrame
print("Affiliations DataFrame:")
print(affiliations_df)

# Save the DataFrame to a CSV file
affiliations_df.to_csv('political_affiliations.csv', index=False) #Change the directory as needed


Request 1: Retrieved 20 results
Request 2: Retrieved 20 results
Request 3: Retrieved 20 results
Request 4: Retrieved 20 results
Request 5: Retrieved 20 results
Request 6: Retrieved 20 results
Request 7: Retrieved 20 results
Request 8: Retrieved 20 results
Request 9: Retrieved 20 results
Request 10: Retrieved 20 results
Request 11: Retrieved 20 results
Request 12: Retrieved 20 results
Request 13: Retrieved 20 results
Request 14: Retrieved 20 results
Request 15: Retrieved 20 results
Request 16: Retrieved 20 results
Request 17: Retrieved 20 results
Request 18: Retrieved 20 results
Request 19: Retrieved 20 results
Request 20: Retrieved 20 results
Request 21: Retrieved 20 results
Request 22: Retrieved 20 results
Request 23: Retrieved 20 results
Request 24: Retrieved 20 results
Request 25: Retrieved 20 results
Request 26: Retrieved 20 results
Request 27: Retrieved 20 results
Request 28: Retrieved 20 results
Request 29: Retrieved 20 results
Request 30: Retrieved 20 results
Request 31: Retriev

### Use Judge ID to scrape gifts

In [None]:
# Load the affiliations data from the CSV file
affiliations_df = pd.read_csv('political_affiliations.csv')

# Define the base URL and the API token
financial_disclosures_url = "https://www.courtlistener.com/api/rest/v3/financial-disclosures/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51"
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
gifts_data = []
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)

def handle_pause(message, duration):
    print(message)
    time.sleep(duration)
    return 0  # Reset request count

# Loop through each judge's ID to get their gifts
for index, row in affiliations_df.iterrows():
    person_id = row['person_id']
    political_party = row['political_party']
    print(f"Requesting gifts for Judge with person ID: {person_id}")

    # Query gifts for the person
    financial_response = requests.get(f"{financial_disclosures_url}?person={person_id}&fields=gifts", headers=headers)
    if financial_response.status_code == 200:
        financial_data = financial_response.json()
        for disclosure in financial_data['results']:
            for gift in disclosure.get('gifts', []):
                gifts_data.append({
                    'person_id': person_id,
                    'political_party': political_party,
                    'source': gift['source'],
                    'description': gift['description'],
                    'value': gift['value']
                })
        request_count += 1  # Count this request

    # Handle 429 Too Many Requests
    elif financial_response.status_code == 429:
        request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)

    else:
        print(f"Failed to retrieve data: {financial_response.status_code}")
        break

    # Pause to avoid throttling
    if request_count >= max_requests_per_hour:
        request_count = handle_pause(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...", pause_duration)

# Create pandas DataFrame from the gifts list
gifts_df = pd.DataFrame(gifts_data)

# Display the DataFrame
print("Gifts DataFrame:")
print(gifts_df)

# Save the DataFrame to a CSV file
gifts_df.to_csv('gifts.csv', index=False)

### Use Judge ID to scrape Reimbursements

In [None]:
affiliations_df = pd.read_csv('political_affiliations.csv')

# Define the base URL and the API token
financial_disclosures_url = "https://www.courtlistener.com/api/rest/v3/financial-disclosures/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51" #Replace with your Token
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
reimbursements_data = []
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)

def handle_pause(message, duration):
    print(message)
    time.sleep(duration)
    return 0  # Reset request count

# Loop through each judge's ID to get their reimbursements
for index, row in affiliations_df.iterrows():
    person_id = row['person_id']
    political_party = row['political_party']
    print(f"Requesting reimbursements for Judge with person ID: {person_id}")

    # Query reimbursements for the person
    financial_response = requests.get(f"{financial_disclosures_url}?person={person_id}&fields=reimbursements", headers=headers)
    if financial_response.status_code == 200:
        financial_data = financial_response.json()
        for disclosure in financial_data['results']:
            for reimbursement in disclosure.get('reimbursements', []):
                reimbursements_data.append({
                    'person_id': person_id,
                    'political_party': political_party,
                    'source': reimbursement['source'],
                    'location': reimbursement['location'],
                    'purpose': reimbursement['purpose'],
                    'items_paid_or_provided': reimbursement['items_paid_or_provided']
                })
        request_count += 1  # Count this request

    # Handle 429 Too Many Requests
    elif financial_response.status_code == 429:
        request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)

    else:
        print(f"Failed to retrieve data: {financial_response.status_code}")
        break

    # Pause to avoid throttling
    if request_count >= max_requests_per_hour:
        request_count = handle_pause(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...", pause_duration)

# Create pandas DataFrame from the reimbursements list
reimbursements_df = pd.DataFrame(reimbursements_data)

# Display the DataFrame
print("Reimbursements DataFrame:")
print(reimbursements_df)

# Save the DataFrame to a CSV file
reimbursements_df.to_csv('reimbursements.csv', index=False)


### Use Judge ID to scrape Investments 

In [None]:
# Load the affiliations data from the CSV file
affiliations_df = pd.read_csv('political_affiliations.csv')

# Define the base URL and the API token
financial_disclosures_url = "https://www.courtlistener.com/api/rest/v3/financial-disclosures/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51" #replace with your own Token
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
investments_data = []
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)

def handle_pause(message, duration):
    print(message)
    time.sleep(duration)
    return 0  # Reset request count

# Loop through each judge's ID to get their investments
for index, row in affiliations_df.iterrows():
    person_id = row['person_id']
    political_party = row['political_party']
    print(f"Requesting investments for Judge with person ID: {person_id}")

    # Query investments for the person
    financial_response = requests.get(f"{financial_disclosures_url}?person={person_id}&fields=investments", headers=headers)
    if financial_response.status_code == 200:
        financial_data = financial_response.json()
        for disclosure in financial_data['results']:
            for investment in disclosure.get('investments', []):
                investments_data.append({
                    'person_id': person_id,
                    'political_party': political_party,
                    'description': investment['description'],
                    'income_during_reporting_period_code': investment['income_during_reporting_period_code'],
                    'income_during_reporting_period_type': investment['income_during_reporting_period_type'],
                    'gross_value_code': investment['gross_value_code'],
                    'gross_value_method': investment['gross_value_method']
                })
        request_count += 1  # Count this request

    # Handle 429 Too Many Requests
    elif financial_response.status_code == 429:
        request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)

    else:
        print(f"Failed to retrieve data: {financial_response.status_code}")
        break

    # Pause to avoid throttling
    if request_count >= max_requests_per_hour:
        request_count = handle_pause(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...", pause_duration)

# Create pandas DataFrame from the investments list
investments_df = pd.DataFrame(investments_data)

# Display the DataFrame
print("Investments DataFrame:")
print(investments_df)

# Save the DataFrame to a CSV file
investments_df.to_csv('investments.csv', index=False)

### Test

In [2]:
# Define the base URLs and the API token
political_affiliations_url = "https://www.courtlistener.com/api/rest/v3/political-affiliations/"
financial_disclosures_url = "https://www.courtlistener.com/api/rest/v3/financial-disclosures/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51" #Replace your Token here
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
next_url = political_affiliations_url
affiliations = []
gifts_data = []
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)
processed_person_ids = set()  # Set to track processed person IDs

# Loop through the required number of pages of the API
while next_url:
    # Request political affiliations
    response = requests.get(next_url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        num_results = len(data['results'])
        print(f"Request {request_count + 1}: Retrieved {num_results} results")
        request_count += 1  # Count this request

        for result in data['results']:
            person_url = result['person']
            person_id = person_url.split('/')[-2]
            political_party = result['political_party']

            # Skip if this person_id has already been processed
            if person_id in processed_person_ids:
                continue

            affiliations.append({
                'person_id': person_id,
                'political_party': political_party
            })

            # Display message when requesting gifts for a judge
            print(f"Requesting gifts for Judge with person ID: {person_id}")

            # Query gifts for the person
            financial_response = requests.get(f"{financial_disclosures_url}?person={person_id}&fields=gifts", headers=headers)
            if financial_response.status_code == 200:
                financial_data = financial_response.json()
                for disclosure in financial_data['results']:
                    for gift in disclosure.get('gifts', []):
                        gifts_data.append({
                            'person_id': person_id,
                            'source': gift['source'],
                            'description': gift['description'],
                            'value': gift['value']
                        })
                request_count += 1  # Count this request

                # Break if reaching the max_requests_per_hour limit
                if request_count >= max_requests_per_hour:
                    break

            # Mark this person_id as processed
            processed_person_ids.add(person_id)

        # Update next_url to the URL for the next page of results
        next_url = data['next']
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        break

    # Pause to avoid throttling
    if request_count >= max_requests_per_hour:
        print(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...")
        time.sleep(pause_duration)
        request_count = 0  # Reset the request count after pausing

# Create pandas DataFrames from the affiliations and gifts lists
affiliations_df = pd.DataFrame(affiliations)
gifts_df = pd.DataFrame(gifts_data)

Request 1: Retrieved 20 results
Requesting gifts for Judge with person ID: 16207
Requesting gifts for Judge with person ID: 16204
Requesting gifts for Judge with person ID: 16202
Requesting gifts for Judge with person ID: 16201
Requesting gifts for Judge with person ID: 16200
Requesting gifts for Judge with person ID: 16199
Requesting gifts for Judge with person ID: 16198
Requesting gifts for Judge with person ID: 16197
Requesting gifts for Judge with person ID: 8392
Requesting gifts for Judge with person ID: 16195
Requesting gifts for Judge with person ID: 16193
Requesting gifts for Judge with person ID: 16192
Requesting gifts for Judge with person ID: 16191
Requesting gifts for Judge with person ID: 16190
Requesting gifts for Judge with person ID: 16188
Requesting gifts for Judge with person ID: 8391
Requesting gifts for Judge with person ID: 16186
Requesting gifts for Judge with person ID: 16185
Requesting gifts for Judge with person ID: 16184
Requesting gifts for Judge with person 

### Ignore these

In [None]:
import requests
import pandas as pd
import time

# Define the base URLs and the API token
political_affiliations_url = "https://www.courtlistener.com/api/rest/v3/political-affiliations/"
financial_disclosures_url = "https://www.courtlistener.com/api/rest/v3/financial-disclosures/"
api_token = "69eaf27ff33d7cc385017a602aceff71c32fdd51"
headers = {
    'Authorization': f'Token {api_token}'
}

# Initialize variables
next_url = political_affiliations_url
affiliations = []
gifts_data = []
request_count = 0
max_requests_per_hour = 4500  # Set a safe number of requests before pausing
pause_duration = 3600  # Pause duration in seconds (1 hour)
processed_person_ids = set()  # Set to track processed person IDs

def handle_pause(message, duration):
    print(message)
    time.sleep(duration)
    return 0  # Reset request count

# Loop through the required number of pages of the API
while next_url:
    try:
        # Request political affiliations
        response = requests.get(next_url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            num_results = len(data['results'])
            print(f"Request {request_count + 1}: Retrieved {num_results} results")
            request_count += 1  # Count this request

            for result in data['results']:
                person_url = result['person']
                person_id = person_url.split('/')[-2]
                political_party = result['political_party']

                # Skip if this person_id has already been processed
                if person_id in processed_person_ids:
                    continue

                affiliations.append({
                    'person_id': person_id,
                    'political_party': political_party
                })

                # Display message when requesting gifts for a judge
                print(f"Requesting gifts for Judge with person ID: {person_id}")

                # Query gifts for the person
                financial_response = requests.get(f"{financial_disclosures_url}?person={person_id}&fields=gifts", headers=headers)
                if financial_response.status_code == 200:
                    financial_data = financial_response.json()
                    for disclosure in financial_data['results']:
                        for gift in disclosure.get('gifts', []):
                            gifts_data.append({
                                'person_id': person_id,
                                'source': gift['source'],
                                'description': gift['description'],
                                'value': gift['value']
                            })
                    request_count += 1  # Count this request

                # Handle 429 Too Many Requests
                elif financial_response.status_code == 429:
                    request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)

                # Mark this person_id as processed
                processed_person_ids.add(person_id)

            # Update next_url to the URL for the next page of results
            next_url = data['next']
        
        # Handle 429 Too Many Requests
        elif response.status_code == 429:
            request_count = handle_pause("Too many requests. Pausing for 1 hour...", pause_duration)
        
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

        # Pause to avoid throttling
        if request_count >= max_requests_per_hour:
            request_count = handle_pause(f"Reached {max_requests_per_hour} requests. Pausing for {pause_duration / 60} minutes to avoid throttling...", pause_duration)

    except Exception as e:
        print(f"An error occurred: {e}")
        break

# Create pandas DataFrames from the affiliations and gifts lists
affiliations_df = pd.DataFrame(affiliations)
gifts_df = pd.DataFrame(gifts_data)

# Display the DataFrames
print("Affiliations DataFrame:")
print(affiliations_df)
print("\nGifts DataFrame:")
print(gifts_df)

# Optionally, save the DataFrames to CSV files
affiliations_df.to_csv('political_affiliations_full.csv', index=False)
gifts_df.to_csv('gifts_full.csv', index=False)
