In [1]:
import pandas as pd
import os
import json
from shapely.geometry import Point
import difflib
from geopy.distance import vincenty
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
inline_rc = dict(mpl.rcParams)

In [2]:
#in_folder = "audits/2018_04/data/"
in_folder = "../data/"
#out_folder = "/tmp/"
out_folder = "audits/2018_07/images/"

In [3]:
osm_stoppoints = pd.read_csv(os.path.join(in_folder, "osm-transit-extractor_stop_points.csv"), dtype=str)
for c in osm_stoppoints.columns:
    if c not in ["stop_point_id", "osm:wheelchair"]:
        osm_stoppoints.drop(c, axis=1, inplace=True)
    elif c == "stop_point_id":
        new_col = "osm_stop_id"
        osm_stoppoints.rename(columns={c: new_col}, inplace=True)
    elif c == "osm:wheelchair":
        new_col = "osm_wheelchair"
        osm_stoppoints.rename(columns={c: new_col}, inplace=True)

osm_routepoints = pd.read_csv(os.path.join(in_folder, "osm_routepoints_for_matching.csv"), dtype=str)
for c in osm_routepoints.columns:
    new_col = "osm_" + c
    osm_routepoints.rename(columns={c: new_col}, inplace=True)
osm_routepoints = osm_routepoints.merge(osm_stoppoints, how="left", on="osm_stop_id")
osm_routepoints["osm_wheelchair"] = osm_routepoints["osm_wheelchair"].fillna("")

gtfs_routepoints = pd.read_csv(os.path.join(in_folder, "opendata_routepoints.csv"), dtype=str)
for c in gtfs_routepoints.columns:
    new_col = "gtfs_" + c
    gtfs_routepoints.rename(columns={c: new_col}, inplace=True)
    
print("On a {:d} routepoints dans OSM et {:d} routepoints dans le GTFS".format(osm_routepoints.shape[0], gtfs_routepoints.shape[0]))


On a 22873 routepoints dans OSM et 101504 routepoints dans le GTFS


## Etat de l'accessibilité sur les deux jeux de données
A noter que l'accessibilité dans OSM et dans GTFS indiquent une notion de pouvoir monter dans des vehicules à cet arrêt (et pas juste "l'arrêt est accessible")

### Accessibilité dans les routepoints OSM

In [4]:
osm_access_group = osm_routepoints.groupby('osm_wheelchair').size()
osm_access_group[""] = osm_access_group[""] + osm_access_group["designated"]
osm_access_group.pop("designated")
#grouping limited with the yes value to be compliant with gtfs groups
osm_access_group["yes"] = osm_access_group["yes"] + osm_access_group["limited"]
osm_access_group.pop("limited")
osm_access_count = pd.DataFrame(osm_access_group.values, columns=["count"])
osm_access_count["access"] = osm_access_group.index
osm_access_count["percent"] = osm_access_count["count"] / osm_access_group.sum() * 100

osm_access_count.head()

Unnamed: 0,count,access,percent
0,21367,,93.415818
1,91,no,0.397849
2,1415,yes,6.186333


### Accessibilité dans les routepoints GTFS

In [5]:
gtfs_routepoints = gtfs_routepoints.fillna("")
nb_gtfs_routepoints = gtfs_routepoints.shape[0]
gtfs_access_group = gtfs_routepoints.groupby('gtfs_wheelchair_boarding').size()
gtfs_access_count = pd.DataFrame(gtfs_access_group.values, columns=["count"])
gtfs_access_count["access"] = gtfs_access_group.index
gtfs_access_count["percent"] = gtfs_access_count["count"] / nb_gtfs_routepoints * 100
gtfs_access_count

Unnamed: 0,count,access,percent
0,68503,,67.487981
1,2819,0.0,2.77723
2,30182,1.0,29.734789


On constate que dans le GTFS, plus de 30% des routepoints ont une indication d'accessibilité.
Dans OSM, moins de 7% des routepoints ont une indication d'accessibilité.
Le ratio en volume pour les deux est de 1500/33000 = 4,5%

## Etude qualitative

In [6]:
osm_stoppoints.head()

Unnamed: 0,osm_stop_id,osm_wheelchair
0,StopPoint:Node:17924573,yes
1,StopPoint:Node:29414560,yes
2,StopPoint:Node:29441475,
3,StopPoint:Node:95617756,
4,StopPoint:Node:125272923,


In [7]:
quali_routepoints = pd.read_csv(os.path.join(out_folder, "mapping_des_routepoints.csv"), dtype=str)
quali_routepoints["gtfs_wheelchair_boarding"] = quali_routepoints["gtfs_wheelchair_boarding"].fillna("")
# on a besoin d'ajouter l'accessibilité OSM 
quali_routepoints = quali_routepoints.merge(osm_stoppoints, how="left", on="osm_stop_id")
quali_routepoints["osm_wheelchair"] = quali_routepoints["osm_wheelchair"].fillna("")
# on nettoie les valeurs issues d'OSM (comme fait plus haut)
quali_routepoints['osm_wheelchair'] = quali_routepoints['osm_wheelchair'].map(
    {
        'designated': "", 
        'limited': "yes",
        "yes": "yes",
        "no": "no",
        "": ""
    }
)

print("nombre total de routepoints : {}".format(quali_routepoints.shape[0]))
#print("nombre total de routepoints : {}".format(quali_routepoints.shape[0]))
#quali_routepoints.shape
quali_routepoints_different = quali_routepoints[quali_routepoints["gtfs_wheelchair_boarding"] != quali_routepoints["osm_wheelchair"]]
print("nombre total de routepoints avec accessibilité différente: {}".format(quali_routepoints_different.shape[0]))


nombre total de routepoints : 15137
nombre total de routepoints avec accessibilité différente: 8400


In [8]:
quali_routepoints_different.groupby("gtfs_wheelchair_boarding").size()

gtfs_wheelchair_boarding
       78
0     568
1    7754
dtype: int64

In [9]:
unknown_access_in_gtfs = quali_routepoints_different[quali_routepoints_different["gtfs_wheelchair_boarding"] == ""]
unknown_access_in_gtfs_group = unknown_access_in_gtfs.groupby('osm_wheelchair').size()

unknown_access_in_gtfs_group

osm_wheelchair
no     15
yes    63
dtype: int64

sur les **78 routepoints** avec une accessibilité non connue du GTFS, OSM permet **de toutes les spécifier**.

In [10]:
no_access_in_gtfs = quali_routepoints_different[quali_routepoints_different["gtfs_wheelchair_boarding"] == "0"]
no_access_in_gtfs_group = no_access_in_gtfs.groupby('osm_wheelchair').size()

no_access_in_gtfs_group

osm_wheelchair
       560
no       5
yes      3
dtype: int64

Sur les **568** routepoints qui sont notés comme non accessibles, OSM indique que **3 routepoints sont accessibles**.

In [11]:
access_yes_in_gtfs = quali_routepoints_different[quali_routepoints_different["gtfs_wheelchair_boarding"] == "1"]
access_yes_in_gtfs_group = access_yes_in_gtfs.groupby('osm_wheelchair').size()

access_yes_in_gtfs_group

osm_wheelchair
       6649
no       54
yes    1051
dtype: int64

sur les **7754** routepoints qui sont notés comme accessible, OSM **met en doute 54** routepoints

## Conclusion

OSM n'est pas une source d'amélioration notable de l'information d'accessibilité. Cependant, cette information n'est encore que très peu collectée sur le terrain, et elle permet déjà de mettre en doute l'accessibilité de plusieurs arrêts par rapport au référentiel officiel.
Un guide permettant de savoir comment indiquer qu'un arrêt est accessible ainsi qu'une animation de la communauté sur ce thème pourrait permettre une mise en qualité certaine.