In [1]:
# loading libraries
import requests
import csv
import pandas as pd

In [2]:
# create a function to batch geocode 
def geocode(url, params, input_file, output_file):
    with open(input_file, 'rb') as file: 
        files = {'addressFile': file}
        response = requests.post(url = url, params = params, files = files)
        if response.status_code == 200:
            try: 
                with open(output_file, 'wb') as output: 
                    output.write(response.content)
                print(f'Geocoded results saved to {output_file}')
            except Exception as e: 
                print(f'An error ocurred while geocoding: {e}')
        else: 
            print(f'Error: {response.status_code}, {response.text}')

In [3]:
# geocoding services web api url        
batch_geocode_url = 'https://geocoding.geo.census.gov/geocoder/geographies/addressbatch'

geocode_params = {
    'returntype': 'geographies',
    'benchmark': 'Public_AR_Current',
    'vintage': 'Current_Current'
}

In [4]:
# import credit union file
credit_list = []
with open('../FederallyInsuredCreditUnions_2024q3.csv','r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    num = 0
    for row in reader:
        if num == 0:
            headers = row
            num += 1
        else:
            credit_list.append(row)

In [5]:
# create new file with just the address information
with open('../NCUA Addresses.csv','w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow([headers[0], headers[5], headers[6], headers[7], headers[8]])
    for i in range(len(credit_list)):
        writer.writerow([credit_list[i][0], credit_list[i][5], credit_list[i][6], credit_list[i][7], credit_list[i][8]])

In [7]:
# process the address file
geocode_input_file = '../NCUA Addresses.csv'
geocode_output_file = '../addresses_geocoded.csv'
geocode(batch_geocode_url, geocode_params, geocode_input_file, geocode_output_file)

Geocoded results saved to ../addresses_geocoded.csv


In [8]:
# create a dataframe and combine the results from the original file and the geocoded addresses
data = pd.read_csv(r'../FederallyInsuredCreditUnions_2024q3.csv')
geocodes = pd.read_csv(r'../addresses_geocoded.csv')
geocodes.columns = ['Charter number', 'Input Address', 'Match Type', 'Match Precision', 'Standardized Address', 'Coordinates', 'Census Geoid', 'Unknown1', 'State Code', 'County Code', 'Tract Code', 'Block Code']
data['Charter number'] = data['Charter number'].astype(str)
geocodes['Charter number'] = geocodes['Charter number'].astype(str)
combined = data.merge(geocodes, on = ['Charter number'], how='left', sort=True)

In [9]:
# create the FIPS code
combined['FIPS 11'] = ''
for i in range(len(combined)):
    if combined['Match Type'][i] == 'Match':
        combined.loc[i, 'FIPS 11'] = f'{str(int(combined['State Code'][i])).zfill(2)}{str(int(combined['County Code'][i])).zfill(3)}{str(int(combined['Tract Code'][i])).zfill(6)}' 

In [10]:
# export the data into a parquet file
combined.to_parquet('../combined_file.parquet', index=False)