In [1]:
# Install Dependencies
!pip install geopy tqdm


Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
Installing collected packages: geographiclib, geopy

   -------------------- ------------------- 1/2 [geopy]
   ---------------------------------------- 2/2 [geopy]

Successfully installed geographiclib-2.0 geopy-2.4.1


In [2]:
# Import necessary libraries
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from tqdm.notebook import tqdm
import time


In [3]:
# Load Cleaned DataFrame

df = pd.read_csv("../data/cleaned/cleaned_us_bank_deposits.csv")
df.head()


Unnamed: 0,rank,total_deposits,bank_name
0,1,2601221000000.0,JPMorgan Chase Bank
1,2,2071624000000.0,Bank of America
2,3,1419560000000.0,Wells Fargo Bank
3,4,1361654000000.0,Citibank
4,5,523102100000.0,U.S. Bank


In [4]:
# Initialize Nominatim Geocoder

geolocator = Nominatim(user_agent="geo_bank_locator")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)  # avoid being blocked


In [5]:
# Geocode Bank Names (Safe with Caching)

# Only geocode unique bank names
unique_banks = df["bank_name"].dropna().unique()
geo_results = {}

for bank in tqdm(unique_banks):
    try:
        location = geocode(bank)
        if location:
            geo_results[bank] = (location.latitude, location.longitude)
        else:
            geo_results[bank] = (None, None)
    except Exception as e:
        geo_results[bank] = (None, None)


  0%|          | 0/3877 [00:00<?, ?it/s]

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Santander Bank, N.A.',), **{}).
Traceback (most recent call last):
  File "e:\Apps\MiniConda\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
  File "e:\Apps\MiniConda\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
  File "e:\Apps\MiniConda\Lib\http\client.py", line 1430, in getresponse
    response.begin()
    ~~~~~~~~~~~~~~^^
  File "e:\Apps\MiniConda\Lib\http\client.py", line 331, in begin
    version, status, reason = self._read_status()
                              ~~~~~~~~~~~~~~~~~^^
  File "e:\Apps\MiniConda\Lib\http\client.py", line 292, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
  File "e:\Apps\MiniConda\Lib\socket.py", line 719, in readinto
    return self._sock.recv_into(b)
           ~~~~~~~~~~~~~~~~~~~~^^^


In [6]:
# Add Latitude and Longitude to DataFrame

df["latitude"] = df["bank_name"].map(lambda x: geo_results.get(x, (None, None))[0])
df["longitude"] = df["bank_name"].map(lambda x: geo_results.get(x, (None, None))[1])


In [8]:
import os

# Create enriched folder if it doesn't exist
os.makedirs("../data/enriched", exist_ok=True)

# Save to enriched dataset
df.to_csv("../data/enriched/enriched_us_bank_locations.csv", index=False)
