# Address Segmentation
Conversion of address points into segmented address ranges along a road network.

**Notes:** The following guide assumes data has already been cleaned and filtered of undesired data.

In [35]:
import contextily as ctx
import geopandas as gpd
import ipympl
import matplotlib.pyplot as plt
import re
from operator import itemgetter

## Load and preview dataframes

In [36]:
addresses = gpd.read_file("C:/scratch/City_Of_Yellowknife.gpkg", layer="addresses")
roads = gpd.read_file("C:/scratch/City_Of_Yellowknife.gpkg", layer="roads")

In [37]:
addresses.head()

Unnamed: 0,number,street,geometry
0,43A,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000)
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000)
2,136B,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000)
3,129B,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000)
4,2B,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000)


In [38]:
roads.head()

Unnamed: 0,featurecod,community,routename,surface,date,roadname,geometry
0,RSTRLIN,Northwest Territories,,Unpaved,2015-09-28,VEE LAKE ROAD,"LINESTRING Z (-12731561.088 8988790.623 0.000,..."
1,RRESLIN,Northwest Territories,,Unpaved,2015-09-28,,"LINESTRING Z (-12731215.603 8990017.829 0.000,..."
2,RHWYLIN,Northwest Territories,Highway 4,Paved,2015-09-28,INGRAHAM TRAIL,"LINESTRING Z (-12723960.099 8980833.256 0.000,..."
3,RARTLIN,Northwest Territories,,Unpaved,2015-09-28,DETTAH ROAD,"LINESTRING Z (-12723909.403 8972227.140 0.000,..."
4,RSTRLIN,Northwest Territories,,Unpaved,2015-09-28,DETTAH,"LINESTRING Z (-12724863.590 8957290.788 0.000,..."


## Preview data

In [44]:
%matplotlib widget

# Fetch basemap.
# Note: basemaps are retrieved in EPSG:3857 and, therefore, dataframes should also use this crs.
basemap, extent = ctx.bounds2img(*roads.total_bounds, ll=False, source=ctx.providers.Esri.WorldImagery)

# Configure plot with basemap.
fig, ax = plt.subplots()
plt.imshow(basemap, extent=extent)
roads.plot(ax=ax, color="blue", label="roads", linewidth=0.5)
addresses.plot(ax=ax, color="red", label="addresses", markersize=2)
ax.ticklabel_format(style="plain")
ax.set_xticks(itemgetter(0, 1)(extent))
plt.xlabel("Longitude (m)")
plt.ylabel("Latitude (m)")
plt.title("City of Yellowknife")
plt.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Generate / fix address attributes - number and suffix

In [39]:
addresses["suffix"] = addresses["number"].map(lambda val: re.sub(pattern="\\d+", repl="", string=val, flags=re.I))
addresses["number"] = addresses["number"].map(lambda val: re.sub(pattern="[^\\d]", repl="", string=val, flags=re.I))
addresses.head()

Unnamed: 0,number,street,geometry,suffix
0,43,OTTO DRIVE,POINT Z (-12728491.168 8972072.805 0.000),A
1,48,TAYLOR ROAD,POINT Z (-12733829.550 8964622.287 0.000),
2,136,DE WEERDT DRIVE,POINT Z (-12731620.348 8969989.275 0.000),B
3,129,HAENER DRIVE,POINT Z (-12732186.396 8969861.224 0.000),B
4,2,STIRLING COURT,POINT Z (-12731633.895 8970568.762 0.000),B


## Configure address to roads linkages