# Address Segmentation
Conversion of address points into segmented address ranges along a road network.

**Notes:** The following guide assumes data has already been preprocessed including data scrubbing and filtering.

In [45]:
import contextily as ctx
import geopandas as gpd
import ipympl
import matplotlib.pyplot as plt
import re
from operator import itemgetter

## Load and preview dataframes

In [46]:
addresses = gpd.read_file("C:/scratch/City_Of_Yellowknife.gpkg", layer="addresses")
roads = gpd.read_file("C:/scratch/City_Of_Yellowknife.gpkg", layer="roads")

In [47]:
addresses.head()

Unnamed: 0,number,street,geometry
0,43A,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000)
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000)
2,136B,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000)
3,129B,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000)
4,2B,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000)


In [48]:
roads.head()

Unnamed: 0,featurecod,community,routename,surface,date,roadname,geometry
0,RSTRLIN,Northwest Territories,,Unpaved,2015-09-28,VEE LAKE ROAD,"LINESTRING Z (-12731561.088 8988790.623 0.000,..."
1,RRESLIN,Northwest Territories,,Unpaved,2015-09-28,,"LINESTRING Z (-12731215.603 8990017.829 0.000,..."
2,RHWYLIN,Northwest Territories,Highway 4,Paved,2015-09-28,INGRAHAM TRAIL,"LINESTRING Z (-12723960.099 8980833.256 0.000,..."
3,RARTLIN,Northwest Territories,,Unpaved,2015-09-28,DETTAH ROAD,"LINESTRING Z (-12723909.403 8972227.140 0.000,..."
4,RSTRLIN,Northwest Territories,,Unpaved,2015-09-28,DETTAH,"LINESTRING Z (-12724863.590 8957290.788 0.000,..."


## Preview data

In [51]:
%matplotlib widget

# Fetch basemap.
# Note: basemaps are retrieved in EPSG:3857 and, therefore, dataframes should also use this crs.
basemap, extent = ctx.bounds2img(*roads.total_bounds, ll=False, source=ctx.providers.Esri.WorldImagery)

# Configure plot with basemap.
fig, ax = plt.subplots()
plt.imshow(basemap, extent=extent)
roads.plot(ax=ax, color="cyan", label="roads", linewidth=0.5)
addresses.plot(ax=ax, color="red", label="addresses", markersize=2)
ax.ticklabel_format(style="plain")
ax.set_xticks(itemgetter(0, 1)(extent))
plt.xlabel("Longitude (m)")
plt.ylabel("Latitude (m)")
plt.title("City of Yellowknife")
plt.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Generate / fix address attributes - number and suffix

In [50]:
addresses["suffix"] = addresses["number"].map(lambda val: re.sub(pattern="\\d+", repl="", string=val, flags=re.I))
addresses["number"] = addresses["number"].map(lambda val: re.sub(pattern="[^\\d]", repl="", string=val, flags=re.I))
addresses.head()

Unnamed: 0,number,street,geometry,suffix
0,43,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000),A
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000),
2,136,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000),B
3,129,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000),B
4,2,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000),B


## Configure address to roads linkages
Links addresses to the nearest, matching road segment.

In [62]:
# Link addresses and roads on join fields.
addresses["road_index"] = addresses["street"].map(lambda val: tuple(set(roads[roads["roadname"] == val].index)))
addresses.head()

Unnamed: 0,number,street,geometry,suffix,road_index
0,43,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000),A,"(389, 390, 391, 361, 911, 370)"
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000),,"(1382, 1383, 168, 170, 651, 650, 173, 652, 654..."
2,136,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000),B,"(804, 807)"
3,129,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000),B,"(800, 801, 802, 803, 806, 338)"
4,2,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000),B,"(342,)"


In [63]:
# Filter plural linkages to the road with the lowest (nearest) geometric distance.

def get_nearest_linkage(pt, road_indexes):
    
    # Get road geometries.
    road_geometries = tuple(map(lambda index: roads["geometry"].iloc[index], road_indexes))
    
    # Get road distances from address point.
    road_distances = tuple(map(lambda road: pt.distance(road), road_geometries))
    
    # Get the road index associated with the smallest distance.
    road_index = road_indexes[road_distances.index(min(road_distances))]
    
    return road_index

# Flag and reduce plural linkages.
flag_plural = addresses["road_index"].map(len) > 1
addresses.loc[flag_plural, "road_index"] = addresses[flag_plural][["geometry", "road_index"]].apply(
    lambda row: get_nearest_linkage(*row), axis=1)

# Unpack first tuple element for singular linkages.
addresses.loc[~flag_plural, "road_index"] = addresses[~flag_plural]["road_index"].map(itemgetter(0))

addresses.head()

Unnamed: 0,number,street,geometry,suffix,road_index
0,43,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000),A,390
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000),,168
2,136,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000),B,807
3,129,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000),B,338
4,2,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000),B,342


In [65]:
# Compile linked road geometry for each address.
addresses["road_geometry"] = addresses.merge(
    roads["geometry"], how="left", left_on="road_index", right_index=True)["geometry_y"]
addresses.head()

Unnamed: 0,number,street,geometry,suffix,road_index,road_geometry
0,43,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000),A,390,"LINESTRING Z (-12728785.212 8972077.288 0.000,..."
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000),,168,"LINESTRING Z (-12733834.171 8964493.879 0.000,..."
2,136,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000),B,807,"LINESTRING Z (-12731747.194 8969855.676 0.000,..."
3,129,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000),B,338,"LINESTRING Z (-12732196.969 8969779.397 0.000,..."
4,2,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000),B,342,"LINESTRING Z (-12731536.519 8970569.562 0.000,..."


## Configure address parity
Computes and groups addresses by road parity (left / right side) and linkage.