In [12]:
import geopandas
import pandas

In [13]:
pandas.set_option('display.max_columns', None)

In [14]:
raw_gdf = geopandas.read_file("../data/United States-TX-Austin-neighborhood_ways.zip")

In [15]:
raw_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 96141 entries, 0 to 96140
Data columns (total 30 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   ROAD_ID     96141 non-null  float64 
 1   NAME        78964 non-null  object  
 2   INTERSECTI  96141 non-null  float64 
 3   INTERSE_01  96141 non-null  float64 
 4   OSM_ID      96141 non-null  float64 
 5   TDG_ID      96141 non-null  object  
 6   FUNCTIONAL  96141 non-null  object  
 7   PATH_ID     9752 non-null   float64 
 8   SPEED_LIMI  12106 non-null  float64 
 9   ONE_WAY_CA  20771 non-null  object  
 10  ONE_WAY     20641 non-null  object  
 11  WIDTH_FT    639 non-null    float64 
 12  FT_BIKE_IN  8830 non-null   object  
 13  FT_BIKE_01  0 non-null      object  
 14  TF_BIKE_IN  5958 non-null   object  
 15  TF_BIKE_01  0 non-null      object  
 16  FT_LANES    17686 non-null  float64 
 17  TF_LANES    9105 non-null   float64 
 18  FT_CROSS_L  17648 non-null  float64 
 

In [16]:
# Drop useless columns
# gdf.drop(["ROAD_ID", "INTERSECTI", "INTERSE_01", "OSM_ID", "PATH_ID", "SPEED_LIMI", "WIDTH_FT", "FT_LANES", "TF_LANES", "FT_CROSS_L", "TF_CROSS_L", "TWLTL_CROS", "FT_PARK", "TF_PARK", "FT_SEG_STR", "TF_SEG_STR", "TF_INT_STR", "XWALK", "FT_INT_STR", "ONE_WAY_CA", "ONE_WAY"],  axis = 1, inplace=True)

In [17]:
def tweak_nw(df):
    return df.drop(
        [
            "FT_BIKE_01",
            "FT_CROSS_L",
            "FT_INT_STR",
            "FT_LANES",
            "FT_PARK",
            "FT_SEG_STR",
            "FUNCTIONAL",
            "INTERSECTI",
            "INTERSE_01",
            "JOB_ID",
            "ONE_WAY",
            "ONE_WAY_CA",
            "OSM_ID",
            "PATH_ID",
            "ROAD_ID",
            "SPEED_LIMI",
            "TDG_ID",
            "TF_BIKE_01",
            "TF_CROSS_L",
            "TF_INT_STR",
            "TF_LANES",
            "TF_PARK",
            "TF_SEG_STR",
            "TWLTL_CROS",
            "WIDTH_FT",
            "XWALK",
        ],
        axis=1,
    ).assign(distance=df.length)


gdf = tweak_nw(raw_gdf)


In [18]:
# Check the bike lane categories.
# gdf["FT_BIKE_IN"].unique()

In [19]:
# Compute their distance.
# gdf["distance"] = gdf.length

In [20]:
gdf.head(2)

Unnamed: 0,NAME,FT_BIKE_IN,TF_BIKE_IN,geometry,distance
0,State Highway 45 North,,,"LINESTRING (615518.201 3371356.885, 615467.915...",136.979923
1,,,,"LINESTRING (615527.803 3371236.027, 615482.016...",134.169096


In [21]:
# This does not work
# gdf.to_file("./simplified.csv", driver='CSV')

In [22]:
gdf.crs

<Derived Projected CRS: EPSG:32614>
Name: WGS 84 / UTM zone 14N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 102°W and 96°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Saskatchewan. Mexico. United States (USA).
- bounds: (-102.0, 0.0, -96.0, 84.0)
Coordinate Operation:
- name: UTM zone 14N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [23]:
# Map it!
# gdf.explore(column="FT_BIKE_IN")

In [24]:
# lane_df = gdf[gdf["FT_BIKE_IN"] == "lane"]
# buffered_lane_df = gdf[gdf["FT_BIKE_IN"] == "buffered_lane"]
# sharrow_df = gdf[gdf["FT_BIKE_IN"] == "sharrow"]
# track_df = gdf[gdf["FT_BIKE_IN"] == "track"]
# none_df = gdf[gdf["FT_BIKE_IN"].isna()]

In [25]:
# Looses the geomery though...
grouped = gdf.groupby("FT_BIKE_IN",dropna=False).sum()

In [26]:
# Get the distances in km.
grouped["distance"] = grouped["distance"].astype(int) / 1000

In [27]:
grouped

Unnamed: 0_level_0,distance
FT_BIKE_IN,Unnamed: 1_level_1
buffered_lane,91.83
lane,430.763
sharrow,74.032
track,14.939
,8673.986


In [28]:
gdf["distance"].sum() / 1000

9285.552048446249

In [29]:
grouped.to_dict()

{'distance': {'buffered_lane': 91.83,
  'lane': 430.763,
  'sharrow': 74.032,
  'track': 14.939,
  nan: 8673.986}}

In [30]:
grouped.to_dict('split')

{'index': ['buffered_lane', 'lane', 'sharrow', 'track', nan],
 'columns': ['distance'],
 'data': [[91.83], [430.763], [74.032], [14.939], [8673.986]]}

In [31]:
grouped.to_dict('records')

[{'distance': 91.83},
 {'distance': 430.763},
 {'distance': 74.032},
 {'distance': 14.939},
 {'distance': 8673.986}]

In [32]:
grouped_dict = grouped.to_dict('index')
grouped_dict

{'buffered_lane': {'distance': 91.83},
 'lane': {'distance': 430.763},
 'sharrow': {'distance': 74.032},
 'track': {'distance': 14.939},
 nan: {'distance': 8673.986}}

In [33]:
regrouped = gdf.groupby("FT_BIKE_IN",dropna=False, as_index=False).sum()
regrouped

Unnamed: 0,FT_BIKE_IN,distance
0,buffered_lane,91830.23
1,lane,430763.3
2,sharrow,74032.3
3,track,14939.62
4,,8673987.0


In [34]:
regrouped_d = regrouped.to_dict("records")
regrouped_d

[{'FT_BIKE_IN': 'buffered_lane', 'distance': 91830.23366920228},
 {'FT_BIKE_IN': 'lane', 'distance': 430763.2828821904},
 {'FT_BIKE_IN': 'sharrow', 'distance': 74032.30287448216},
 {'FT_BIKE_IN': 'track', 'distance': 14939.622807880232},
 {'FT_BIKE_IN': nan, 'distance': 8673986.606212495}]