# Wulf's Fish Customer Geocoding Analysis

This notebook processes and analyzes customer location data for Wulf's Fish. It performs the following key operations:

- Loads customer data from `wulfs_customers.csv`
- Implements geocoding and reverse geocoding using a local Nominatim server
- Converts addresses to coordinates (geocoding)
- Retrieves address information from coordinates (reverse geocoding)
- Validates location data accuracy
- Processes ~500 customer records

Key components:
- Base depot location: 8 Seafood Way, Boston
- Customer data includes addresses across Massachusetts
- Uses OpenStreetMap data through Nominatim for geocoding services

In [1]:
import pandas as pd
wulfs_cust_df = pd.read_csv("../data/wulfs_customer_master.csv")
# 8 Seafood Way, Boston, MA 02210‑2603
depot = (42.34902, -71.031180)

wulfs_cust_df.head()

Unnamed: 0,Customer Name,Street Address,Unit,City,State,Latitude,Longitude
0,1928 Beacon Hill LLC,97 Mt. Vernon St,,Boston,MA,42.358085,-71.069732
1,AC Kitchen,161 Merrimac St,,Woburn,MA,42.514958,-71.148602
2,Bar Volpe,170 W Broadway,,Boston,MA,42.340412,-71.053037
3,Fox & The Knife,28 West Broadway,,South Boston,MA,42.342732,-71.056494
4,Alba Restaurant,1486 Hancock Street,,Quincy,MA,42.248658,-71.001808


In [10]:
import requests
from urllib.parse import urlencode


def call_nominatim(call_type,params):
    """
    Fetches a address related to this code
    """
    NOMINATIM_URL = "http://192.168.1.101:8080"

    url = f"{NOMINATIM_URL}/{call_type}"
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
        route_data = response.json()
        return route_data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching route: {e}")
        return None

def geocode(query):
    try:
        params = { 
            "q": query,
            "format": "json",
            "limit": 1
            }
            
        response = call_nominatim("search",params)

        # Expecting a list of dicts
        if isinstance(response, list) and len(response) > 0:
            first = response[0]

            lat = first.get("lat")
            lon = first.get("lon")

            # Ensure both values exist
            if lat is not None and lon is not None:
                return (float(lat), float(lon))
    except Exception as e:
        print(f"⚠️ Geocoding failed for '{query}': {e}")

    return (None, None)


def reverse_geocode(lat,lon):
    response = None
    try:
        params = { 
        "lat": lat,
        "lon": lon,
        "format": "json",
        "addressdetails": 1}
            
        response = call_nominatim("reverse", params)
        return response
    except Exception as e:
        print(f"⚠️ Reverse Geocoding failed for '[{lat},{lon}]': {e}")


def address_for_coordinates(lat,lon):   
    response  = reverse_geocode(lat,lon)    
    if not response:
        return (None, None, None) 
    if isinstance(response, dict):
        addr_str=None
        address_type = response.get("addresstype",None)
        address = response.get("address",None)
        if address:
            house_number = address.get("house_number","")
            road = address.get("road","")
            town = address.get("town","")
            state = address.get("state","")
            postcode = address.get("postcode","")
            amenity = address.get("amenity","")
            addr_str = f"{house_number} {road}, {town} MA {postcode}"
        return(addr_str, address_type, amenity)
            
    return response

def zip_for_coordinates(lat,lon):
    response  = reverse_geocode(lat,lon)  
    # Expecting a list of dicts
    if isinstance(response, dict):
        address = response.get("address",None)
        if address:
            postcode = address.get("postcode","")
        return postcode
    return response
            


In [27]:
# Run a single address
address = "11 Father Jacobbe Rd East Boston MA"
response = geocode(address)
print(response)
address_details = zip_for_coordinates(response[0],response[1])
address_details

(None, None)
Error fetching route: 400 Client Error: Bad Request for url: http://192.168.1.101:8080/reverse?format=json&addressdetails=1


In [32]:
address_details = reverse_geocode(42.244272,-71.830101)
address_details

{'place_id': 2041402,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'node',
 'osm_id': 13115003114,
 'lat': '42.2443396',
 'lon': '-71.8301219',
 'class': 'shop',
 'type': 'hairdresser',
 'place_rank': 30,
 'importance': 9.99999999995449e-06,
 'addresstype': 'shop',
 'name': 'PheChanT Beauty Pro',
 'display_name': 'PheChanT Beauty Pro, 26, Cambridge Street, Main South, Worcester, Worcester County, Massachusetts, 01603, United States',
 'address': {'shop': 'PheChanT Beauty Pro',
  'house_number': '26',
  'road': 'Cambridge Street',
  'neighbourhood': 'Main South',
  'city': 'Worcester',
  'county': 'Worcester County',
  'state': 'Massachusetts',
  'ISO3166-2-lvl4': 'US-MA',
  'postcode': '01603',
  'country': 'United States',
  'country_code': 'us'},
 'boundingbox': ['42.2442896', '42.2443896', '-71.8301719', '-71.8300719']}

In [17]:
# Run Reverse Geocode against the customer dataframe
# Get the zip at the coordinates
# Approximately 20 sec. to run 500

cust_zips = []
cust_geocodes_match=0
errors=0
geocodes = [(row['Latitude'], row['Longitude']) for _, row in wulfs_cust_df.iterrows()]
for index, coordinates in enumerate(geocodes):
    response = zip_for_coordinates(coordinates[0], coordinates[1])
    if not response:
        print(f"{index}: {coordinates[0]}, {coordinates[1]} No response")
        errors +=1
        response =('','','')
    cust_zips.append(response)

285: 41.382133, -70.598025 No response
461: 41.901827, -71.2586384 No response


In [22]:
#wulfs_cust_df["ZIP"]=cust_zips
#wulfs_cust_df = wulfs_cust_df[["Customer Name","Street Address","Unit","City","State","ZIP","Latitude","Longitude"]]
wulfs_cust_df.to_csv("../data/wulfs_customer_master.csv",index=False)

In [None]:
# Run Geocode against the customer dataframe
# Approximately 30 sec. to run 500

cust_geocodes = []
cust_geocodes_match=0
errors=0
addresses = [(row['Street Address'], row['City'], row['State']) for _, row in wulfs_cust_df.iterrows()]
for index, address in enumerate(addresses):
    address = " ".join(address)
    #print(f"{index} {address}")
    response = geocode(address)
    if response and all(response):
        areSame=compare_lat_lon(response,locations[index])
        cust_geocodes_match += 1 if areSame else 0
        #print(f"{index} {response}=={locations[index]} {areSame}")
    else:
        errors +=1
        print(f"{index} No response {errors}")
    cust_geocodes.append(response)

22 No response 1
41 No response 2
68 No response 3
82 No response 4
142 No response 5
197 No response 6
217 No response 7
220 No response 8
221 No response 9
224 No response 10
253 No response 11
285 No response 12
344 No response 13
356 No response 14
431 No response 15
473 No response 16


In [24]:
wulfs_cust_df.to_excel("../data/wulfs_customer_master.xlsx", index=False, engine="openpyxl")