In [6]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString

In [7]:
#reading in the routes and poi data
routes = pd.read_csv('cleaned_routes_data/cleaned_all_routes_data_long.csv')
poi = pd.read_csv('all_nodes_uncleaned.csv').iloc[0:5000, :] #using smaller sample of uncleaned data for experimentation

In [8]:
#converting routes df into a GeoDataFrame
gdf_routes = gpd.GeoDataFrame(routes, geometry=gpd.points_from_xy(routes.latitude, routes.longitude))
gdf_routes.drop(['Unnamed: 0', 'lat_lgt'], axis=1, inplace=True)
gdf_routes

Unnamed: 0,route_id,num_of_waypoint,latitude,longitude,geometry
0,1005019,0,52.50607,13.33208,POINT (52.50607 13.33208)
1,1005019,1,52.50553,13.33163,POINT (52.50553 13.33163)
2,1005019,2,52.50525,13.33148,POINT (52.50525 13.33148)
3,1005019,3,52.50515,13.33337,POINT (52.50515 13.33337)
4,1005019,4,52.50520,13.33366,POINT (52.50520 13.33366)
...,...,...,...,...,...
119839,933359,151,52.50444,13.38246,POINT (52.50444 13.38246)
119840,933359,152,52.50525,13.38633,POINT (52.50525 13.38633)
119841,933359,153,52.50643,13.38615,POINT (52.50643 13.38615)
119842,933359,154,52.50648,13.39023,POINT (52.50648 13.39023)


In [9]:
#converting geometry from routes data to lines and creating a new gdf from it
routes_linestring_series = gdf_routes.groupby(['route_id'])['geometry'].apply(lambda x: LineString(x.tolist()))
gdf_lined_routes = routes_linestring_series.to_frame()
gdf_lined_routes

Unnamed: 0_level_0,geometry
route_id,Unnamed: 1_level_1
113043,"LINESTRING (52.45147 13.69072, 52.45147 13.690..."
113104,"LINESTRING (52.50054 13.44143, 52.50090 13.442..."
113232,"LINESTRING (52.42580 13.62998, 52.42403 13.629..."
128939,"LINESTRING (52.54556 12.99893, 52.54447 13.004..."
148436,"LINESTRING (52.50736 13.52251, 52.50712 13.522..."
...,...
3673084,"LINESTRING (52.53555 13.20110, 52.53553 13.201..."
3674009,"LINESTRING (52.53883 13.21284, 52.53883 13.212..."
3674010,"LINESTRING (52.53883 13.21284, 52.53883 13.212..."
3674128,"LINESTRING (52.48880 13.26176, 52.48877 13.261..."


In [10]:
#converting poi df into a GeoDataFrame
gdf_poi = gpd.GeoDataFrame(poi, geometry=gpd.points_from_xy(poi.lat, poi.lon))
gdf_poi

Unnamed: 0.1,Unnamed: 0,type,id,lat,lon,tags,geometry
0,0,node,78252154,52.523744,13.398627,"{'addr:city': 'Berlin', 'addr:country': 'DE', ...",POINT (52.52374 13.39863)
1,1,node,87036263,52.532985,13.384282,"{'amenity': 'atm', 'brand': 'Sparda-Bank', 'br...",POINT (52.53299 13.38428)
2,2,node,89275133,52.518025,13.406956,"{'amenity': 'atm', 'name': 'Bankhaus August Le...",POINT (52.51802 13.40696)
3,3,node,213106623,52.542170,13.441137,"{'addr:country': 'DE', 'addr:housenumber': '87...",POINT (52.54217 13.44114)
4,4,node,213113204,52.542750,13.392862,"{'amenity': 'atm', 'name': 'Berliner Sparkasse...",POINT (52.54275 13.39286)
...,...,...,...,...,...,...,...
4995,4995,node,2687646399,52.407078,13.562105,"{'amenity': 'bench', 'backrest': 'yes', 'check...",POINT (52.40708 13.56210)
4996,4996,node,2687646403,52.407081,13.562048,"{'amenity': 'bench', 'backrest': 'yes', 'check...",POINT (52.40708 13.56205)
4997,4997,node,2687646405,52.407066,13.564322,"{'amenity': 'bench', 'backrest': 'no', 'check_...",POINT (52.40707 13.56432)
4998,4998,node,2687646415,52.407187,13.563088,"{'amenity': 'bench', 'backrest': 'yes', 'check...",POINT (52.40719 13.56309)


In [11]:
#spacial joining both datasets on nearest (with a distance)
poi_routes = gpd.sjoin_nearest(gdf_lined_routes, gdf_poi, how='left', max_distance=1000, distance_col=True)
poi_routes

#to do:
#using openstreetmap or a plotter to find out what it does
#change name of distance col
#probably only joins one poi (certainly overwrites)
#distance is in CRS units, find out what makes sense

Unnamed: 0_level_0,geometry,index_right,Unnamed: 0,type,id,lat,lon,tags,True
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
113043,"LINESTRING (52.45147 13.69072, 52.45147 13.690...",661,661,node,5611563550,52.454944,13.625469,"{'amenity': 'atm', 'brand': 'Berliner Volksban...",0.000191
113104,"LINESTRING (52.50054 13.44143, 52.50090 13.442...",726,726,node,6222931160,52.517469,13.399069,"{'amenity': 'atm', 'brand': 'Reisebank', 'bran...",0.000184
113232,"LINESTRING (52.42580 13.62998, 52.42403 13.629...",4121,4121,node,1913837266,52.396408,13.636151,{'amenity': 'bench'},0.019583
128939,"LINESTRING (52.54556 12.99893, 52.54447 13.004...",3629,3629,node,1461085197,52.524875,12.996633,"{'amenity': 'bench', 'backrest': 'yes', 'bin':...",0.004069
148436,"LINESTRING (52.50736 13.52251, 52.50712 13.522...",5,5,node,239694634,52.497501,13.521643,"{'amenity': 'atm', 'atm': 'yes', 'bic': 'BEVOD...",0.004569
...,...,...,...,...,...,...,...,...,...
3673084,"LINESTRING (52.53555 13.20110, 52.53553 13.201...",3839,3839,node,1718146344,52.526746,13.201534,"{'amenity': 'bench', 'backrest': 'yes', 'check...",0.000013
3674009,"LINESTRING (52.53883 13.21284, 52.53883 13.212...",3797,3797,node,1680374621,52.545732,13.211210,"{'amenity': 'bench', 'backrest': 'yes', 'colou...",0.000053
3674010,"LINESTRING (52.53883 13.21284, 52.53883 13.212...",3797,3797,node,1680374621,52.545732,13.211210,"{'amenity': 'bench', 'backrest': 'yes', 'colou...",0.000053
3674128,"LINESTRING (52.48880 13.26176, 52.48877 13.261...",4432,4432,node,2295133277,52.495679,13.199486,{'amenity': 'bench'},0.000020


GeoDataFrame-methods: 
crosses(other[, align]) Returns a Series of dtype('bool') with value True for each aligned geometry that cross other.
distance(other[, align]) Returns a Series containing the distance to aligned other.
intersection(other[, align]) Returns a GeoSeries of the intersection of points in each aligned geometry with other.
intersects(other[, align]) Returns a Series of dtype('bool') with value True for each aligned geometry that intersects other.
join(other[, on, how, lsuffix, rsuffix, sort]) Join columns of another DataFrame.
mad([axis, skipna, level]) Return the mean absolute deviation of the values over the requested axis.
sjoin(df, *args, **kwargs) Spatial join of two GeoDataFrames. 
symmetric_difference(other[, align]) Returns a GeoSeries of the symmetric difference of points in each aligned geometry with other.
sjoin_nearest(right[, how, max_distance, ...]) Spatial join of two GeoDataFrames based on the distance between their geometries.

more: https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.html