In [5]:
import requests
import pandas as pd 
import csv

In [6]:
# function to scrape fdic api and save it to a csv file 
def fdic_api(url, params, file_path, data_points: int): 
    """
    This function retrieves data from the FDIC API in batches, saving it to a CSV file. 
    It uses pagination by adjusting the 'offset' and 'limit' query parameters for each batch.

    Parameters: 
    url - base url of the FDIC api
    params - query parameters for the API (e.g. fields that needs to be included in the dataset being retrieved, format (csv, txt, json etc)
    file_path - file path where retrieved file gets saved 
    data_points - total number of data points available for retrieval (found in the metadata)
    """
    # define total number of fdic limit per API request
    fdic_limit = 10000

    # calculate number of batches to retrieve all data
    total_data_pts = data_points
    num_of_batches = (total_data_pts // fdic_limit) + (1 if total_data_pts % fdic_limit > 0 else 0) # if remainder is greater than 0, add 1 to the num batches, otherwise add 0

    with open(file_path, 'w', newline = '', encoding = 'utf-8') as file: 
        writer = None

        # update query parameters for each batch
        for batch in range(num_of_batches): 
            params['offset'] = batch * fdic_limit 
            params['limit'] = fdic_limit # ensures the api only fetches 10000 data points at a time 

            # make the api request
            response = requests.get(url, params)
            if response.status_code == 200: 
                try: 
                    response_data = response.text.splitlines()
                    reader = csv.reader(response_data) # parse the response data into rows using csv.reader
                    # write data to file
                    if writer is None: 
                        writer = csv.writer(file) # writes rows to the file
                        writer.writerows(reader)
                    else: 
                        next(reader) # skips header row for all other iterations
                        writer.writerows(reader)
                except Exception as e:
                    print(f'An error occurred while reading batch {batch + 1}: {e}')
                    raise
            else: 
                print(f'Error: {response.status_code}')
                print(f'{response.text}')
                break
    print(f'All data retrieved successfully. {total_data_pts} data points saved to {file_path}')
                
    # above function works if api has a limit (needs pagination), otherwise the function below works: 
    # response = requests.get(url, params)
    # if response.status_code == 200:
    #     try: 
    #         response_data = response.text.splitlines()
    #         reader = csv.reader(response_data)
    #         with open(file_path, 'w', newline = '', encoding = 'utf-8') as file: 
    #             writer = csv.writer(file)
    #             writer.writerows(reader)
    #     except Exception as e:
    #         print(f'An error occurred while writing to the file: {e}')
    #         raise
    # else:
    #     print(f'Error: {response.status_code}')
    #     print(f'{response.text}')

In [7]:
# fdic api has a limit of 10000 data points per request
# institutions file - FDIC API

# base_url_institutions = 'https://banks.data.fdic.gov/api/institutions?'
# insti_params = {
#     'fields': 'ACTIVE,ADDRESS,ADDRESS2,ASSET,BKCLASS,CBSA,CBSA_DIV,CBSA_DIV_FLG,CBSA_DIV_NO,CBSA_METRO,CBSA_METRO_FLG,CBSA_METRO_NAME,CBSA_MICRO_FLG,CBSA_NO,CITY,CLCODE,COUNTY,ENDEFYMD,ESTYMD,FED,FED_RSSD,INACTIVE,LATITUDE,LONGITUDE,NAME,NETINC,OFFDOM,OFFICES,OFFOA,STCNTY,STNAME,STNUM,UNINUM,WEBADDR,ZIP',
#     'format': 'csv'
# }
# insti_file_path = 'institutions_data.csv'
# insti_data_pts = 27825

# fdic_api(base_url_institutions, insti_params, insti_file_path, insti_data_pts)

In [8]:
# locations file - FDIC API 

# base_url_locations = 'https://banks.data.fdic.gov/api/locations?'
# loc_params = {
#     'fields': 'ADDRESS,BKCLASS,CBSA,CBSA_DIV,CBSA_DIV_FLG,CBSA_DIV_NO,CBSA_METRO,CBSA_METRO_FLG,CBSA_METRO_NAME,CBSA_MICRO_FLG,CBSA_NO,CITY,COUNTY,ESTYMD,MAINOFF,NAME,OFFNAME,OFFNUM,SERVTYPE,STALP,STCNTY,STNAME,UNINUM,ZIPCODE',
#     'format': 'csv',
#     'limit': 10000,
#     'offset': 0
# }
# loc_file_path = 'locations_data.csv'
# loc_data_pts = 78908

# fdic_api(base_url_locations, loc_params, loc_file_path, loc_data_pts)

In [9]:
# failures (list of bank failures up to data) - FDIC API 

# base_url_failures = 'https://banks.data.fdic.gov/api/failures?'
# fail_params = {
#     'fields': 'NAME,CITYST,FAILDATE,FAILYR,CHCLASS1,RESDATE,RESTYPE,QBFDEP,QBFASSET,COST,PSTALP',
#     'format': 'csv', 
#     'limit': 10000,
#     'offset': 0
# }
# fail_file_path = 'failures_data.csv'
# fail_data_pts = 4111

# fdic_api(base_url_failures, fail_params, fail_file_path, fail_data_pts)

In [10]:
# demographics (summary of demographic information) - FDIC API 
# demographics filtered using CALLYM from Jan 2015 - Jan 2025
base_url_demographics = 'https://banks.data.fdic.gov/api/demographics?'
demo_params = {
    'filters': 'CALLYM:["201501" TO "202501"]',
    'fields': 'ACTEVT,BRANCH,CALLYM,CALLYMD,CBSANAME,CERT,CLCODE,CMSA,CNTRYALP,CNTRYNUM,CNTYNUM,CSA,DIVISION,FDICAREA,METRO,MNRTYCDE,OFFDMULT,OFFTOT,OFFSTATE,WEBADDR',
    'format': 'csv',
    'limit': 10000,
    'offset': 0
}
demo_file_path = 'demographics_data.csv'
demo_data_pts = 190714

fdic_api(base_url_demographics, demo_params, demo_file_path, demo_data_pts)

All data retrieved successfully. 190714 data points saved to demographics_data.csv
