In [3]:
import geopandas as gpd
import pandas as pd
import numpy as np
import re


## Go back to the original dataset with road types

#### 1. Apply the function add_space to make the road names the same
#### 2. Classify the road_names inorder to get the different road types classified under female, male, object and/or plant.

In [5]:
ams_roads = gpd.read_file('/Users/sharaishuro/Documents/Shemaps/8%_project/ams_roads/amsterdam_roads1.shp')
ams_roads.head()

Unnamed: 0,ROAD_NAME,ROAD_TYPE,OSM_ID,geometry
0,Beneluxbaan,Secondary,7362525.0,"LINESTRING (4.8693 52.32187, 4.8693 52.32191)"
1,Floraweg,Unclassified,978257100.0,"LINESTRING (4.91147 52.40009, 4.91142 52.40009..."
2,Asterweg,Tertiary,1011127000.0,"LINESTRING (4.90246 52.39359, 4.9025 52.39354,..."
3,Th. K. Van Lohuizenlaan,Tertiary,931233800.0,"LINESTRING (4.94618 52.3673, 4.94618 52.36742)"
4,Th. K. Van Lohuizenlaan,Tertiary,931233800.0,"LINESTRING (4.9462 52.36663, 4.94619 52.36686)"


In [4]:
# Define a regex pattern that ensures the street type is not already separated
STREET_TYPES_PATTERN = re.compile(r'(?<!\s)(straat|brug|singel|burg|hof|plein|gracht|laan|weg|boulevard|steeg|gouw|post|pad|park|baan|plantsoen|toren|berg|veld|dreef|dorp|dijk|kade)\b', re.IGNORECASE)

def add_space(road_name):
    if isinstance(road_name, str):
        return STREET_TYPES_PATTERN.sub(r' \1', road_name)
    return road_name

In [5]:
ams_roads['ROAD_NAME'] =ams_roads['ROAD_NAME'].apply(add_space)

In [6]:
ams_roads=ams_roads.rename(columns={'ROAD_NAME':'road_name'})

In [6]:
df = pd.read_csv('classified.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,road_name,gender,classification
0,0,'S-Gravelandse Veer,male,human
1,1,A. Moen straat,male,human
2,2,Akkerwinde weg,male,human
3,3,Akoleien straat,unknown,other
4,4,Alexander straat,male,human


In [None]:
# Merge the gender csv with the roads
updated_ams= ams_roads.merge(df[['road_name','gender','classification']], how= 'inner',on='road_name')
updated_ams.head()

In [None]:
# Save the updated shp
updated_ams.to_file('updated_ams.shp')

In [8]:
# Re-open it
updated_ams= gpd.read_file('updated_ams.shp')
updated_ams.head()

Unnamed: 0,road_name,ROAD_TYPE,OSM_ID,gender,classifica,geometry
0,Benelux baan,Secondary,7362525.0,male,human,"LINESTRING (4.8693 52.32187, 4.8693 52.32191)"
1,Flora weg,Unclassified,978257100.0,female,human,"LINESTRING (4.91147 52.40009, 4.91142 52.40009..."
2,Aster weg,Tertiary,1011127000.0,unknown,other,"LINESTRING (4.90246 52.39359, 4.9025 52.39354,..."
3,Th. K. Van Lohuizen laan,Tertiary,931233800.0,male,human,"LINESTRING (4.94618 52.3673, 4.94618 52.36742)"
4,Th. K. Van Lohuizen laan,Tertiary,931233800.0,male,human,"LINESTRING (4.9462 52.36663, 4.94619 52.36686)"


### Calculate length

#### To calculate the length of geometry we have to change the coordinate system to Amersfoort / RD New - Netherlands - Holland - Dutch EPSG:28992 from epsg:4326

In [None]:
updated_ams= updated_ams.to_crs(epsg=28992) 

In [92]:
updated_ams['length_km']= (updated_ams['geometry'].length/1000)
updated_ams.head()

Unnamed: 0,road_name,ROAD_TYPE,OSM_ID,gender,classifica,geometry,length,length_km
0,Benelux baan,Secondary,7362525.0,male,human,"LINESTRING (119690.329 481673.206, 119690.361 ...",4.450584,0.004451
1,Flora weg,Unclassified,978257100.0,female,human,"LINESTRING (122622.491 490356.49, 122619.088 4...",36.970572,0.036971
2,Aster weg,Tertiary,1011127000.0,unknown,other,"LINESTRING (122004.457 489637.34, 122007.142 4...",39.223737,0.039224
3,Th. K. Van Lohuizen laan,Tertiary,931233800.0,male,human,"LINESTRING (124962.558 486693.21, 124962.639 4...",13.351844,0.013352
4,Th. K. Van Lohuizen laan,Tertiary,931233800.0,male,human,"LINESTRING (124963.467 486618.656, 124962.941 ...",25.600094,0.0256


## Calculate the length according to gender, plants, objects, other against the different road types

In [None]:
#Total length of roads named after women
female=updated_ams[updated_ams['gender']=='female']
updated_ams[updated_ams['gender']=='female'].groupby('classifica').agg({'length_km': 'sum'})
pd.pivot_table(female, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()

Unnamed: 0_level_0,length_km
classifica,Unnamed: 1_level_1
other,601.679532


In [None]:
#Total length of roads named after men
male=updated_ams[updated_ams['gender']=='male']
pd.pivot_table(male, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['gender']=='male'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,332,23950.324185
Construction,87,11798.402715
Corridor,3,357.402031
Cycleway,1393,149919.262175
Footway,878,55600.084555


In [None]:
#Total length of roads named after plants
plants=updated_ams[updated_ams['classifica']=='Plant']
pd.pivot_table(plants, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['classifica']=='Plant'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,26,1204.666751
Construction,9,666.628168
Cycleway,63,4086.127614
Footway,100,6178.481863
Living street,7,184.988659


In [None]:
#Total length of roads named after animals
animals=updated_ams[updated_ams['classifica']=='Animal']
pd.pivot_table(other, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['classifica']=='Animal'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,12,439.29988
Cycleway,6,1107.039164
Footway,9,525.994043
Living street,2,287.822136
Path,1,110.369891


In [None]:
#Total length of roads named after other
other=updated_ams[updated_ams['classifica']=='other']
pd.pivot_table(other, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['classifica']=='other'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,181,10301.81037
Construction,5,636.346437
Corridor,4,281.9019
Cycleway,722,74744.59836
Footway,858,52656.63527


In [None]:
#Total length of roads named after places
places=updated_ams[updated_ams['classifica']=='Place']
pd.pivot_table(places, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['classifica']=='Place'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,102,7860.428942
Construction,18,1475.895471
Cycleway,449,57308.035171
Footway,144,12310.053475
Living street,31,2039.088863


In [None]:
#Total length of roads named after objects
objects=updated_ams[updated_ams['classifica']=='Object']
pd.pivot_table(objects, index='ROAD_TYPE', values='length', aggfunc={'sum','count'}).head()
updated_ams[updated_ams['classifica']=='Object'].groupby('classifica').agg({'length_km': 'sum'})

Unnamed: 0_level_0,count,sum
ROAD_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
Busway,20,2238.652023
Construction,1,12.248811
Cycleway,112,6861.583012
Footway,81,5089.597667
Living street,18,1065.418035


### Calculate the percentage of streets names after plants, objects, plants, other and gender

In [29]:
# Calculate the percentage of roads according to gender
total_female_rd= (updated_ams['gender']=='female').sum()
total_male_rd = (updated_ams['gender']== 'male').sum()
total_plant_rd = (updated_ams['classifica']=='Plant').sum()
total_place_rd = (updated_ams['classifica']=='Place').sum()
total_object_rd = (updated_ams['classifica']=='Object').sum()
total_animals_rd = (updated_ams['classifica']=='Animal').sum()
total_other_rd = (updated_ams['classifica']=='other').sum()
total_roads = 27731 


In [23]:
# Percentage of roads names after females
percent_women= (total_female_rd/total_roads)*100
percent_women

np.float64(11.734160325988965)

In [24]:
# Percentage of roads names after males
percent_males= (total_male_rd/total_roads)*100
percent_males

np.float64(44.827088817568786)

In [25]:
# Percentage of roads names after plants
percent_plant= (total_plant_rd/total_roads)*100
percent_plant

np.float64(2.848797374779128)

In [26]:
# Percentage of roads names after places
percent_place= (total_place_rd/total_roads)*100
percent_place

np.float64(10.861490750423714)

In [27]:
# Percentage of roads names after objects
percent_object= (total_object_rd/total_roads)*100
percent_object

np.float64(2.8199487937687064)

In [30]:
# Percentage of roads names after animals
percent_animals= (total_animals_rd/total_roads)*100
percent_animals

np.float64(0.8690635029389491)