# San Francisco street network data

https://data.sfgov.org/Geographic-Locations-and-Boundaries/San-Francisco-Basemap-Street-Centerlines/7hfy-8sz8

- Select "Export" -> "Shapefile"

In [14]:
%matplotlib inline
import matplotlib.pyplot as plt
import geopandas as gpd
import os, collections

datadir = os.path.join("..", "..", "..", "..", "Data")

In [7]:
filename = os.path.join(datadir, "San Francisco Basemap Street Centerlines")
frame = gpd.GeoDataFrame.from_file(filename)

In [16]:
frame.head()

Unnamed: 0,accepted,classcode,cnn,cnntext,district,f_node_cnn,geometry,jurisdicti,layer,lf_fadd,...,nhood,oneway,rt_fadd,rt_toadd,st_type,street,street_gc,streetname,t_node_cnn,zip_code
0,N,0,15145000.0,15145000,,26132000.0,LINESTRING (-122.4469424451799 37.757228921523...,,PRIVATE,0.0,...,Twin Peaks,B,0.0,0.0,CT,CROWN,CROWN,CROWN CT,54223000.0,94114
1,Y,0,11305001.0,11305001,,26255000.0,LINESTRING (-122.4479152656158 37.757589498018...,DPW,UPROW,1.0,...,Clarendon Heights,B,2.0,36.0,AVE,SAINT GERMAIN,SAINT GERMAIN,SAINT GERMAIN AVE,54224000.0,94114
2,Y,5,11305002.0,11305002,,54224000.0,LINESTRING (-122.4488770135246 37.757536327477...,DPW,STREETS,39.0,...,Clarendon Heights,B,38.0,98.0,AVE,SAINT GERMAIN,SAINT GERMAIN,SAINT GERMAIN AVE,26247000.0,94114
3,Y,5,13798000.0,13798000,,35015000.0,LINESTRING (-122.4665977274102 37.725924475312...,DPW,STREETS,221.0,...,Ingleside Terrace,F,222.0,298.0,ST,CORONA,CORONA,CORONA ST,35015000.0,94127
4,N,5,5389001.0,5389001,,35016000.0,LINESTRING (-122.3808508035504 37.736279384703...,DPW,STREETS,0.0,...,Hunters Point,B,0.0,0.0,AVE,FAIRFAX,FAIRFAX,FAIRFAX AVE,35017000.0,94124


In [9]:
frame.columns

Index(['accepted', 'classcode', 'cnn', 'cnntext', 'district', 'f_node_cnn',
       'geometry', 'jurisdicti', 'layer', 'lf_fadd', 'lf_toadd', 'multigeom',
       'nhood', 'oneway', 'rt_fadd', 'rt_toadd', 'st_type', 'street',
       'street_gc', 'streetname', 't_node_cnn', 'zip_code'],
      dtype='object')

## Column/Field meanings

- `accepted` : Boolean Y/N ???
- `classcode` : Number between 0 and 6
- `cnn` : Unique number id code
- `cnntext` : As `cnn` but as a string
- `district` : Always missing
- `f_node_cnn` : Believe a code into the underlying graph structure
- `geometry` : Obvious
- `jurisdicti` : Missing or DPW
- `layer` : See below
- `lf_fadd` : Address information for left side; 0 means "empty"
- `lf_toadd` : Ditto
- `multigeom` : Boolean, but always F
- `nhood` : One of a large number of neighboorhood names
- `oneway` : One of "B", "F" or "T".  Guess that "T" means "yes" and "F" or "B" means "no".
- `rt_fadd` : Address information for right side; 0 means "empty"
- `rt_toadd` : Ditto
- `st_type` : See "names" section below
- `street` : Ditto
- `street_gc` : Ditto
- `streetname` : Ditto
- `t_node_cnn` : Believe a code into the underlying graph structure
- `zip_code` : Obvious

In [55]:
for _, row in frame.iterrows():
    assert row.lf_fadd == int(row.lf_fadd)
    assert row.lf_toadd == int(row.lf_toadd)
    assert row.rt_fadd == int(row.rt_fadd)
    assert row.rt_toadd == int(row.rt_toadd)
    if row.lf_fadd == 0:
        assert row.lf_toadd == 0
    if row.lf_toadd == 0:
        assert row.lf_fadd == 0
    if row.rt_fadd == 0:
        assert row.rt_toadd == 0
    if row.rt_toadd == 0:
        assert row.rt_fadd == 0

In [17]:
collections.Counter(frame["classcode"])

Counter({'0': 1141,
         '1': 102,
         '2': 203,
         '3': 1533,
         '4': 2379,
         '5': 10727,
         '6': 109})

In [26]:
assert len(set(frame.cnn)) == len(frame)
for _, row in frame.iterrows():
    assert int(row.cnn) == row.cnn
    assert str(int(row.cnn)) == row.cnntext

In [27]:
collections.Counter(frame["district"])

Counter({None: 16194})

In [29]:
collections.Counter(frame.jurisdicti)

Counter({None: 3015, 'DPW': 13179})

In [30]:
collections.Counter(frame.multigeom)

Counter({'F': 16194})

In [31]:
collections.Counter(frame.nhood)

Counter({'Twin Peaks': 90,
         'Clarendon Heights': 47,
         'Ingleside Terrace': 103,
         'Hunters Point': 236,
         'Golden Gate Park': 127,
         'Visitacion Valley': 420,
         'Bayview Heights': 201,
         'Potrero Hill': 476,
         'South of Market': 359,
         'Financial District South': 276,
         'Telegraph Hill': 187,
         'Miraloma Park': 164,
         'Mission Bay': 279,
         None: 1,
         'Crocker Amazon': 212,
         'Forest  Hill': 90,
         'Golden Gate Heights': 129,
         'West Portal': 93,
         'Bernal Heights South': 453,
         'Forest Hills Extension': 75,
         'Oceanview': 86,
         'Presidio': 439,
         'Portola': 294,
         'Russian Hill': 242,
         'Inner Mission': 675,
         'Bernal Heights North': 241,
         'Eureka Valley - Dolores Height': 254,
         'Financial District North': 360,
         'Lakeshore': 192,
         'Inner Richmond': 181,
         'Inner Sunset': 249

In [32]:
collections.Counter(frame.oneway)

Counter({'B': 12279, 'F': 2083, 'T': 1832})

## Names

- `street` and `street_gc` always agree, except in cases likes "08TH" / "8TH"
- `st_type` can be None; if so then `street` and `streetname` agree
- Otherwise `streetname` is `street` combined with `st_type`, and _maybe_ with "NORTH" etc. added

In [49]:
for _, row in frame.iterrows():
    if row.street != row.street_gc:
        assert row.street[0] == "0"
        assert row.street[1:] == row.street_gc
    if row.st_type is None:
        assert row.street == row.streetname
    else:
        expect = row.street + " " + row.st_type
        if expect != row.streetname:
            assert row.streetname.startswith(expect)
            postfix = row.streetname[len(expect):]
            assert postfix[0] == " "
            assert postfix[1:] in {"NORTH", "SOUTH", "EAST", "WEST"}

## Layer

Somewhat guessing based on QGIS and base maps etc.

- PAPER, Paper_fwys, Paper_water: Paper, i.e. ficticious streets (e.g. where a street would logically exist were it not for something like a freeway being in the way)
- PARKS etc.: Roads in parks.  As far as crime modelling goes, I don't see that these are different.
- FREEWAYS: As you might guess
- PRIVATE etc.: I think this is a legal thing; do _not_ appear to be inaccessible
- PSEUDO: Only 2 (!) but like "PAPER" I think
- UPROW: I believe these are staircases (semi-famous in SF)
- STREETS etc.: Normal roads

In [56]:
collections.Counter(frame["layer"])

Counter({'FREEWAYS': 210,
         'PAPER': 110,
         'PARKS': 178,
         'PRIVATE': 260,
         'PSEUDO': 2,
         'Paper_fwys': 213,
         'Paper_water': 137,
         'Parks_NPS_FtMaso': 23,
         'Parks_NPS_Presid': 418,
         'Private': 4,
         'Private_parking': 9,
         'STREETS': 14005,
         'Streets': 25,
         'Streets_HuntersP': 5,
         'Streets_Pedestri': 163,
         'UPROW': 245,
         'streets_ti': 158,
         'streets_ybi': 29})