In [55]:
import pandas as pd
# import os
import pyrosm

## Get Data

In [118]:
# Load data
osm = pyrosm.OSM(pyrosm.get_data("berlin"))

network = osm.get_network(
    network_type="cycling",
    extra_attributes=["surface", "smoothness"]
)

# Filter relevant columns
network = network[["id", "geometry", "surface", "smoothness"]]

# Filter only records where both surface and smoothness is set
network = network[(~network["surface"].isna()) & (~network["smoothness"].isna())]

pd.set_option('display.max_rows', 100)
print(network)

               id                                           geometry  \
2         4045220  MULTILINESTRING ((13.52783 52.49043, 13.52639 ...   
3         4045223  MULTILINESTRING ((13.52390 52.48281, 13.52450 ...   
5         4045247  MULTILINESTRING ((13.63700 52.36926, 13.63698 ...   
6         4045248  MULTILINESTRING ((13.63196 52.35216, 13.63190 ...   
7         4045249  MULTILINESTRING ((13.61671 52.34132, 13.61652 ...   
11        4049219  MULTILINESTRING ((13.63305 52.31641, 13.63299 ...   
12        4054007  MULTILINESTRING ((13.61692 52.37729, 13.61698 ...   
15        4054042  MULTILINESTRING ((13.61579 52.34082, 13.61568 ...   
16        4054061  MULTILINESTRING ((13.79352 52.28311, 13.79346 ...   
17        4054062  MULTILINESTRING ((13.78999 52.30059, 13.78995 ...   
18        4057539  MULTILINESTRING ((13.53951 52.26091, 13.54015 ...   
20        4057588  MULTILINESTRING ((13.62893 52.29524, 13.62891 ...   
23        4067879  MULTILINESTRING ((13.78817 52.42336, 13.78819

In [75]:
# Filter relevant columns and calculate counts
# df = network.value_counts(["smoothness", "surface"])

In [82]:
# df = df.reset_index()

In [109]:
# df = df.rename(columns={0 : 'no_streets'})

# df.columns

Index(['smoothness', 'surface', 'no_streets', 'surface_category',
       'smoothness_category'],
      dtype='object')

## Define Surface Categories

In [122]:
def surface_categories(df: pd.DataFrame):
    df['surface_category'] = 'undefined'
    df.loc[df.surface.str.contains('asp', case=False)==True, 'surface_category'] = 'paved' 
    df.loc[df.surface.str.contains('concrete')==True, 'surface_category'] = 'paved'
    df.loc[df.surface.str.contains('paved')==True, 'surface_category'] = 'paved'

    df.loc[df.surface.str.contains('stone')==True, 'surface_category'] = 'cobblestone'
    df.loc[df.surface.str.contains('sett')==True, 'surface_category'] = 'cobblestone'

    df.loc[df.surface.str.contains('unpaved')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('dirt')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('grass')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('earth')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('gravel')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('sand')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('wood')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('bricks')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('ground')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('tartan')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('compacted')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('mud')==True, 'surface_category'] = 'unpaved'
    df.loc[df.surface.str.contains('mulch')==True, 'surface_category'] = 'unpaved'
    
    return df

df = surface_categories(network)

print(df.sort_values('surface_category'))
print(df[['surface_category','id']].groupby('surface_category').count())

# Should paving stone be counted as cobble stone?
# How das "excellent" fine gravel look like?

               id                                           geometry  \
2         4045220  MULTILINESTRING ((13.52783 52.49043, 13.52639 ...   
162900  590860866  MULTILINESTRING ((13.39910 52.54134, 13.39832 ...   
162901  590860867  MULTILINESTRING ((13.39805 52.54225, 13.39804 ...   
44264    34307335  MULTILINESTRING ((13.35592 52.62056, 13.35569 ...   
44263    34307330  MULTILINESTRING ((13.35608 52.62036, 13.35646 ...   
...           ...                                                ...   
31365    26516767  MULTILINESTRING ((13.51835 52.67937, 13.51794 ...   
31377    26516783  MULTILINESTRING ((13.50008 52.67909, 13.50014 ...   
31388    26524257  MULTILINESTRING ((13.56247 52.45790, 13.56267 ...   
30483    26176039  MULTILINESTRING ((13.57859 52.47187, 13.57870 ...   
98201   200523467  MULTILINESTRING ((13.50480 52.65945, 13.50340 ...   

              surface     smoothness surface_category  
2                sett            bad      cobblestone  
162900  paving_stones  

## Define Smoothness Categories

In [126]:
def smoothness_categories(df: pd.DataFrame):
    df['smoothness_category'] = 'undefined'
    df.loc[df.smoothness.str.contains('excellent')==True, 'smoothness_category'] = 'good'
    df.loc[df.smoothness.str.contains('perfect')==True, 'smoothness_category'] = 'good'
    df.loc[df.smoothness.str.contains('good')==True, 'smoothness_category'] = 'good'
    
    df.loc[df.smoothness.str.contains('intermediate')==True, 'smoothness_category'] = 'bad'    
    df.loc[df.smoothness.str.contains('bad')==True, 'smoothness_category'] = 'bad' 
    df.loc[df.smoothness.str.contains('horrible')==True, 'smoothness_category'] = 'bad' 
    df.loc[df.smoothness.str.contains('impassable')==True, 'smoothness_category'] = 'bad' 
 
    return df

df = smoothness_categories(network)

pd.set_option('display.max_columns', None)

dfs = df[['smoothness','smoothness_category','surface_category','id']]

#print(dfs.groupy('smoothness').sum('count'))

print(dfs.groupby(['smoothness','smoothness_category','surface_category']).count().
      sort_values(['smoothness','smoothness_category','surface_category']))
print(df[['smoothness_category','id']].groupby('smoothness_category').count())

                                                      id
smoothness    smoothness_category surface_category      
bad           bad                 cobblestone       1403
                                  paved              421
                                  unpaved           3446
excellent     good                cobblestone        107
                                  paved             4072
                                  unpaved              5
good          good                cobblestone       1033
                                  paved             6678
                                  undefined            1
                                  unpaved             94
horrible      bad                 cobblestone         26
                                  paved                8
                                  unpaved            272
impassable    bad                 unpaved             34
intermediate  bad                 cobblestone       1665
                               

In [128]:
def define_categories(df):
    df = surface_categories(df)
    df = smoothness_categories(df)

    return df

df = define_categories(network)

print(df[['smoothness_category','id']].groupby('smoothness_category').count())
print(df[['surface_category','id']].groupby('surface_category').count())

                        id
smoothness_category       
bad                  11460
good                 12000
undefined                1
                     id
surface_category       
cobblestone        4585
paved             13029
undefined             3
unpaved            5844
