In [1]:
import pandas as pd

In [2]:
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)

In [3]:
df = pd.read_csv(r'parsed_routes_data\all_routes_data.csv') #entire routes dataset
#df = pd.read_csv(r'parsed_routes_data\first_bulk_of_routes_data.csv') #small sample dataset for testing
df.rename(columns={'Unnamed: 0': 'route_id', 'Unnamed: 1': 'num_of_waypoint'}, inplace=True)
df

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
0,1000303.gpx,0,"('38.29842', '-2.65105')",38.29842,-2.65105
1,1000303.gpx,1,"('38.29647', '-2.65079')",38.29647,-2.65079
2,1000303.gpx,2,"('38.29418', '-2.65011')",38.29418,-2.65011
3,1000303.gpx,3,"('38.29499', '-2.65174')",38.29499,-2.65174
4,1000303.gpx,4,"('38.29674', '-2.65174')",38.29674,-2.65174
...,...,...,...,...,...
14486915,99997.gpx,60,"('48.27417', '16.33581')",48.27417,16.33581
14486916,99997.gpx,61,"('48.27445', '16.33493')",48.27445,16.33493
14486917,99997.gpx,62,"('48.27461', '16.33503')",48.27461,16.33503
14486918,99997.gpx,63,"('48.27464', '16.33575')",48.27464,16.33575


In [4]:
#cutting away the file extension from 'routes_id' str (it's going to become an index) 
df['route_id'] = df['route_id'].str.split('.').str[0]

#stripping str values from 'num_of_waypoint' (it's going to become column names)
#df['num_of_waypoint'] = df['num_of_waypoint'].str.strip() #seems to be unnecessary
df

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
0,1000303,0,"('38.29842', '-2.65105')",38.29842,-2.65105
1,1000303,1,"('38.29647', '-2.65079')",38.29647,-2.65079
2,1000303,2,"('38.29418', '-2.65011')",38.29418,-2.65011
3,1000303,3,"('38.29499', '-2.65174')",38.29499,-2.65174
4,1000303,4,"('38.29674', '-2.65174')",38.29674,-2.65174
...,...,...,...,...,...
14486915,99997,60,"('48.27417', '16.33581')",48.27417,16.33581
14486916,99997,61,"('48.27445', '16.33493')",48.27445,16.33493
14486917,99997,62,"('48.27461', '16.33503')",48.27461,16.33503
14486918,99997,63,"('48.27464', '16.33575')",48.27464,16.33575


In [5]:
#finding out about the max number of waypoints
df.sort_values(by = 'num_of_waypoint', ascending = False) #--> 3669799.gpx has 118544 waypoints (that hypercorrect documentation of a file should be excluded)

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
13823180,3669799,118544,"('28.91742', '32.34855')",28.91742,32.34855
13823179,3669799,118543,"('28.96985', '32.35542')",28.96985,32.35542
13823178,3669799,118542,"('29.0281', '32.35473')",29.02810,32.35473
13823177,3669799,118541,"('29.05151', '32.35748')",29.05151,32.35748
13823176,3669799,118540,"('29.04461', '32.34752')",29.04461,32.34752
...,...,...,...,...,...
7471459,3463627,0,"('50.45409', '6.37825')",50.45409,6.37825
7471520,3463633,0,"('47.34538', '13.39151')",47.34538,13.39151
7471533,3463653,0,"('61.11827', '14.61835')",61.11827,14.61835
7471653,3463664,0,"('50.99278', '12.43801')",50.99278,12.43801


In [7]:
#exploring the distribution of the total number of waypoints per route a bit more
routes_grp = df.groupby(['route_id'])
routes_grp['num_of_waypoint'].max().median() #--> median of total waypoints per route is 305
routes_grp['num_of_waypoint'].max().describe() #--> 75% is at 509 total waypoints: cutting at 1000 columns will cover most of the routes

count     28032.000000
mean        515.799372
std        1570.527132
min           1.000000
25%         156.000000
50%         305.000000
75%         509.000000
max      118544.000000
Name: num_of_waypoint, dtype: float64

In [8]:
#cutting down the size of the df before pivoting (costly operation)
#would other wise later return "ValueError: Unstacked DataFrame is too big, causing int32 overflow" (been there)

#filtering out any route with more than 1000 waypoints
df = routes_grp.filter(lambda x: x['num_of_waypoint'].max() < 1000) 

In [9]:
df

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
0,1000303,0,"('38.29842', '-2.65105')",38.29842,-2.65105
1,1000303,1,"('38.29647', '-2.65079')",38.29647,-2.65079
2,1000303,2,"('38.29418', '-2.65011')",38.29418,-2.65011
3,1000303,3,"('38.29499', '-2.65174')",38.29499,-2.65174
4,1000303,4,"('38.29674', '-2.65174')",38.29674,-2.65174
...,...,...,...,...,...
14486915,99997,60,"('48.27417', '16.33581')",48.27417,16.33581
14486916,99997,61,"('48.27445', '16.33493')",48.27445,16.33493
14486917,99997,62,"('48.27461', '16.33503')",48.27461,16.33503
14486918,99997,63,"('48.27464', '16.33575')",48.27464,16.33575


In [10]:
#filtering out all routes, where one of the latitudes is too far from Berlin or are wrong data
routes_grp = df.groupby(['route_id']) #new grouping on new df
df = routes_grp.filter(lambda x: x['latitude'].max() > 52.3)
routes_grp = df.groupby(['route_id']) #new grouping on new df
df = routes_grp.filter(lambda x: x['latitude'].min() < 52.7)
df

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
10726,1005019,0,"('52.50607', '13.33208')",52.50607,13.33208
10727,1005019,1,"('52.50553', '13.33163')",52.50553,13.33163
10728,1005019,2,"('52.50525', '13.33148')",52.50525,13.33148
10729,1005019,3,"('52.50515', '13.33337')",52.50515,13.33337
10730,1005019,4,"('52.5052', '13.33366')",52.50520,13.33366
...,...,...,...,...,...
14428518,933359,151,"('52.50444', '13.38246')",52.50444,13.38246
14428519,933359,152,"('52.50525', '13.38633')",52.50525,13.38633
14428520,933359,153,"('52.50643', '13.38615')",52.50643,13.38615
14428521,933359,154,"('52.50648', '13.39023')",52.50648,13.39023


In [11]:
#filtering out all routes, where one of the longitudes is too far from Berlin or are wrong data
routes_grp = df.groupby(['route_id']) #new grouping on new df
df = routes_grp.filter(lambda x: x['longitude'].max() > 12.9)
routes_grp = df.groupby(['route_id']) #new grouping on new df
df = routes_grp.filter(lambda x: x['longitude'].min() < 13.7)
df

Unnamed: 0,route_id,num_of_waypoint,lat_lgt,latitude,longitude
10726,1005019,0,"('52.50607', '13.33208')",52.50607,13.33208
10727,1005019,1,"('52.50553', '13.33163')",52.50553,13.33163
10728,1005019,2,"('52.50525', '13.33148')",52.50525,13.33148
10729,1005019,3,"('52.50515', '13.33337')",52.50515,13.33337
10730,1005019,4,"('52.5052', '13.33366')",52.50520,13.33366
...,...,...,...,...,...
14428518,933359,151,"('52.50444', '13.38246')",52.50444,13.38246
14428519,933359,152,"('52.50525', '13.38633')",52.50525,13.38633
14428520,933359,153,"('52.50643', '13.38615')",52.50643,13.38615
14428521,933359,154,"('52.50648', '13.39023')",52.50648,13.39023


In [12]:
#reshaping df from long format to wide format
df_reshaped = df.pivot(index='route_id', columns='num_of_waypoint', values='lat_lgt')
df_reshaped

num_of_waypoint,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,...,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
1005019,"('52.50607', '13.33208')","('52.50553', '13.33163')","('52.50525', '13.33148')","('52.50515', '13.33337')","('52.5052', '13.33366')","('52.50538', '13.33391')","('52.5053', '13.3342')","('52.50505', '13.33667')","('52.50499', '13.33755')","('52.50504', '13.33822')","('52.50528', '13.3395')","('52.50527', '13.34009')","('52.50518', '13.3407')","('52.50427', '13.34433')","('52.50233', '13.34427')","('52.4999', '13.34294')","('52.49924', '13.34234')","('52.49731', '13.34191')","('52.49525', '13.34169')","('52.48884', '13.34238')","('52.48767', '13.34255')","('52.4873', '13.34182')","('52.48558', '13.34448')","('52.48424', '13.34444')","('52.48349', '13.34423')","('52.48291', '13.34247')","('52.48098', '13.3374')","('52.481', '13.33333')","('52.47988', '13.33277')","('52.47609', '13.33255')","('52.4718', '13.33277')","('52.47149', '13.34204')","('52.47013', '13.34028')","('52.46927', '13.34208')","('52.4694', '13.34397')","('52.465', '13.34431')","('52.46514', '13.34968')","('52.46531', '13.35159')","('52.46178', '13.35322')","('52.46119', '13.35491')","('52.46297', '13.35502')","('52.46297', '13.35442')","('52.46371', '13.35448')","('52.46371', '13.35577')","('52.46426', '13.35577')","('52.46419', '13.35819')","('52.47172', '13.36231')","('52.47371', '13.36352')","('52.47293', '13.36521')","('52.47334', '13.36555')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
113043,"('52.45147', '13.69072')","('52.45147', '13.69075')","('52.45146', '13.69076')","('52.45149', '13.69088')","('52.4515', '13.69103')","('52.45149', '13.6912')","('52.45147', '13.69135')","('52.45146', '13.69141')","('52.45146', '13.69143')","('52.45145', '13.69147')","('52.45136', '13.69152')","('52.45122', '13.69146')","('52.45115', '13.69154')","('52.45093', '13.69141')","('52.45066', '13.69141')","('52.45065', '13.69141')","('52.45064', '13.69141')","('52.45063', '13.69141')","('52.45047', '13.6914')","('52.45043', '13.6914')","('52.45032', '13.69141')","('52.45009', '13.69143')","('52.45008', '13.69143')","('52.45003', '13.69144')","('52.44993', '13.69145')","('52.44992', '13.69145')","('52.44988', '13.69145')","('52.44972', '13.69146')","('52.44974', '13.69145')","('52.44959', '13.6914')","('52.44921', '13.69147')","('52.4491', '13.69148')","('52.44887', '13.69148')","('52.44862', '13.69154')","('52.44844', '13.69151')","('52.44821', '13.69155')","('52.44799', '13.69156')","('52.44769', '13.69153')","('52.44748', '13.69155')","('52.44744', '13.69154')","('52.44723', '13.69155')","('52.447', '13.69158')","('52.44677', '13.69159')","('52.44658', '13.69162')","('52.4464', '13.69153')","('52.44626', '13.69161')","('52.44611', '13.6915')","('52.44612', '13.69143')","('52.44617', '13.69113')","('52.44622', '13.69095')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
113104,"('52.50054', '13.44143')","('52.5009', '13.44212')","('52.50085', '13.44373')","('52.50081', '13.44434')","('52.50087', '13.44462')","('52.50104', '13.44476')","('52.50258', '13.44669')","('52.50499', '13.44003')","('52.50703', '13.43671')","('52.50849', '13.43465')","('52.50991', '13.43066')","('52.51144', '13.4274')","('52.51327', '13.42426')","('52.51431', '13.42139')","('52.51536', '13.41808')","('52.51546', '13.418')","('52.51559', '13.41735')","('52.51695', '13.41635')","('52.51774', '13.41564')","('52.51908', '13.4148')","('52.51998', '13.41386')","('52.52086', '13.41257')","('52.52061', '13.41205')","('52.5216', '13.41032')","('52.52203', '13.40926')","('52.52215', '13.40875')","('52.52116', '13.4065')","('52.51871', '13.40248')","('52.51823', '13.40083')","('52.5193', '13.39978')","('52.51887', '13.39869')","('52.51766', '13.39918')","('52.51729', '13.39311')","('52.51634', '13.37892')","('52.51627', '13.37731')","('52.51775', '13.37688')","('52.51785', '13.37536')","('52.51858', '13.37525')","('52.51785', '13.37502')","('52.5175', '13.37654')","('52.51629', '13.37695')","('52.51356', '13.37706')","('52.51044', '13.37654')","('52.50952', '13.37669')",,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
113232,"('52.4258', '13.62998')","('52.42403', '13.62913')","('52.421', '13.62858')","('52.42035', '13.62914')","('52.41988', '13.62897')","('52.41907', '13.63047')","('52.41878', '13.63')","('52.41852', '13.63073')","('52.41797', '13.63146')","('52.41726', '13.63163')","('52.41658', '13.63279')","('52.41653', '13.6342')","('52.4164', '13.63562')","('52.41601', '13.63472')","('52.41577', '13.63322')","('52.41593', '13.63206')","('52.41637', '13.63047')","('52.41632', '13.62927')","('52.41632', '13.62841')","('52.4165', '13.62704')","('52.41669', '13.62584')","('52.41697', '13.62528')","('52.41525', '13.62485')","('52.4119', '13.62343')","('52.41127', '13.62257')","('52.41074', '13.62034')","('52.41088', '13.61781')","('52.41056', '13.61532')","('52.41088', '13.61279')","('52.41153', '13.61086')","('52.41205', '13.61026')","('52.41559', '13.61232')","('52.41902', '13.61382')","('52.42004', '13.61464')","('52.42098', '13.61558')","('52.42213', '13.61562')","('52.42315', '13.61622')","('52.42619', '13.61764')","('52.42532', '13.62232')","('52.42545', '13.62292')","('52.42511', '13.62386')","('52.42409', '13.6288')","('52.42422', '13.63004')",,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1265167,"('52.52567', '13.24127')","('52.52532', '13.24098')","('52.5258', '13.23981')","('52.52507', '13.23951')","('52.52541', '13.2384')","('52.52539', '13.23784')","('52.52528', '13.23754')","('52.52407', '13.23729')","('52.52253', '13.2363')","('52.52226', '13.23632')","('52.52174', '13.23662')","('52.52137', '13.2354')","('52.52112', '13.2349')","('52.51999', '13.2337')","('52.52039', '13.23255')","('52.51965', '13.23207')","('52.5189', '13.23136')","('52.51811', '13.23087')","('52.51886', '13.23081')","('52.51848', '13.2306')","('52.517', '13.22894')","('52.51668', '13.22821')","('52.51625', '13.22766')","('52.51615', '13.22725')","('52.51628', '13.22569')","('52.51566', '13.22238')","('52.51584', '13.22108')","('52.51617', '13.22102')","('52.51648', '13.22064')","('52.5157', '13.22079')","('52.51524', '13.22122')","('52.51503', '13.22185')","('52.5152', '13.22314')","('52.51557', '13.22466')","('52.51563', '13.22597')","('52.51519', '13.22583')","('52.51493', '13.22494')","('52.51446', '13.22517')","('52.51335', '13.22593')","('52.51353', '13.22679')","('52.51373', '13.22881')","('52.51386', '13.23078')","('52.51181', '13.2308')","('52.50995', '13.23137')","('52.51121', '13.23937')","('52.51193', '13.24025')","('52.51307', '13.23951')","('52.51313', '13.24011')","('52.51143', '13.24071')","('52.51147', '13.24096')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
830840,"('52.44499', '13.6029')","('52.44461', '13.60457')","('52.44247', '13.60365')","('52.4425', '13.60496')","('52.44203', '13.60545')","('52.44266', '13.61451')","('52.43841', '13.61453')","('52.43832', '13.61704')","('52.43802', '13.61937')","('52.43739', '13.61973')","('52.43239', '13.61963')","('52.43158', '13.61988')","('52.43116', '13.62011')","('52.43079', '13.62075')","('52.42973', '13.62163')","('52.42838', '13.62403')","('52.42755', '13.62478')","('52.42699', '13.62708')","('52.42656', '13.62752')","('52.42594', '13.62761')","('52.42586', '13.62827')","('52.42574', '13.62831')","('52.42556', '13.62997')","('52.42609', '13.63076')","('52.42461', '13.64246')","('52.42013', '13.63999')","('52.41847', '13.63919')","('52.41749', '13.6388')","('52.41619', '13.63768')","('52.41616', '13.6368')","('52.41599', '13.63579')","('52.41577', '13.63337')","('52.41639', '13.63016')","('52.4167', '13.62612')","('52.41697', '13.62514')","('52.41641', '13.62481')","('52.41721', '13.62262')","('52.41764', '13.62026')","('52.41765', '13.6191')","('52.41061', '13.61537')","('52.41099', '13.61247')","('52.41148', '13.61131')","('52.41192', '13.61')","('52.41255', '13.60794')","('52.4125', '13.60695')","('52.41276', '13.60623')","('52.41377', '13.60524')","('52.41455', '13.60356')","('52.41535', '13.60107')","('52.41764', '13.60217')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
853031,"('52.51661', '13.38203')","('52.51674', '13.38186')","('52.51671', '13.38065')","('52.51663', '13.37971')","('52.51639', '13.37819')","('52.51633', '13.37757')","('52.51629', '13.37691')","('52.51657', '13.37706')","('52.51691', '13.37722')","('52.51775', '13.37693')","('52.51776', '13.37494')","('52.51856', '13.37491')","('52.51937', '13.37488')","('52.5194', '13.37349')","('52.52031', '13.37346')","('52.52074', '13.37358')","('52.52078', '13.37206')","('52.52078', '13.37082')","('52.51958', '13.3709')","('52.51939', '13.37085')","('52.51942', '13.37021')","('52.51939', '13.36781')","('52.51903', '13.36634')","('52.5183', '13.36651')","('52.51841', '13.36569')","('52.51839', '13.36397')","('52.51871', '13.36411')","('52.51897', '13.36411')","('52.51894', '13.36347')","('52.51853', '13.36212')","('52.51795', '13.36152')","('52.51765', '13.3613')","('52.51755', '13.36057')","('52.51716', '13.35901')","('52.51695', '13.35765')","('52.51689', '13.35619')","('52.51713', '13.35462')","('52.51687', '13.35427')","('52.51646', '13.35337')","('52.51616', '13.35264')","('52.51569', '13.35148')","('52.51515', '13.35066')","('52.51502', '13.35083')","('52.51459', '13.35114')","('52.51414', '13.35094')","('52.51397', '13.3515')","('52.51296', '13.35384')","('52.51245', '13.35495')","('52.51275', '13.35596')","('52.5131', '13.35693')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
857465,"('52.42119', '13.39242')","('52.42115', '13.39248')","('52.42257', '13.39486')","('52.42295', '13.39555')","('52.42295', '13.39564')","('52.42334', '13.39637')","('52.43057', '13.39228')","('52.43057', '13.39228')","('52.43648', '13.38895')","('52.4378', '13.38814')","('52.43904', '13.38782')","('52.43919', '13.38859')","('52.45474', '13.31959')","('52.4548', '13.3194')","('52.45501', '13.31922')","('52.45542', '13.31948')","('52.45608', '13.32')","('52.46088', '13.32471')","('52.46088', '13.32471')","('52.46327', '13.32708')","('52.46491', '13.32863')","('52.4673', '13.33108')","('52.46899', '13.33272')","('52.47228', '13.33622')","('52.47685', '13.34199')","('52.47812', '13.34379')","('52.5043', '13.33319')","('52.50302', '13.3271')","('52.52189', '13.41053')","('52.52165', '13.41107')","('52.52097', '13.41244')","('52.52257', '13.41454')","('52.52372', '13.4119')","('52.52195', '13.4104')","('52.51045', '13.39006')","('52.43924', '13.3889')","('52.43983', '13.39207')","('52.4408', '13.39175')","('52.44067', '13.39029')","('52.44018', '13.38726')","('52.43901', '13.38764')","('52.43778', '13.38794')","('52.4364', '13.3886')","('52.42331', '13.39603')","('52.42301', '13.39558')","('52.42295', '13.39555')","('52.42089', '13.39205')",,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
909007,"('52.32383', '13.13961')","('52.32396', '13.13403')","('52.32249', '13.13145')","('52.32233', '13.13085')","('52.32007', '13.1336')","('52.31698', '13.1363')","('52.3153', '13.13785')","('52.31367', '13.13888')","('52.30963', '13.14154')","('52.3089', '13.14197')","('52.31126', '13.15201')","('52.31373', '13.15836')","('52.3121', '13.15956')","('52.30995', '13.16222')","('52.30837', '13.16505')","('52.30522', '13.17046')","('52.30208', '13.17518')","('52.29992', '13.18042')","('52.30057', '13.18945')","('52.30008', '13.18977')","('52.29462', '13.18514')","('52.28853', '13.17535')","('52.28507', '13.169')","('52.27856', '13.16093')","('52.27782', '13.15716')","('52.2773', '13.1563')","('52.27606', '13.15544')","('52.27173', '13.14445')","('52.26695', '13.12978')","('52.26729', '13.12875')","('52.27386', '13.13531')","('52.27556', '13.13634')","('52.2841', '13.13948')","('52.2844', '13.13494')","('52.28456', '13.13241')","('52.28509', '13.12484')","('52.28541', '13.11823')","('52.28622', '13.11356')","('52.28654', '13.1115')","('52.28622', '13.10257')","('52.28606', '13.09047')","('52.28522', '13.07613')","('52.28465', '13.06824')","('52.28533', '13.06249')","('52.29352', '13.04944')","('52.29467', '13.0385')","('52.28903', '13.03403')","('52.28911', '13.02747')","('52.28872', '13.01816')","('52.28604', '13.01498')",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [13]:
#exploring the new shape
df_reshaped.shape # --> (463, 1000), so it's 463 routes with a max of 1000 waypoints

(463, 1000)

Conclusion after Data Cleaning

We had about 29.000 routes parsed from wandermap.net that had showed up with "Berlin" as a search string. After cleaning out all the routes that were too far from Berlin geographically and all the routes with exceptionally detailed waypoints description, we find that only 463 routes are useful enough to be included in our models.


In [14]:
#writing the clean df into a csv file
df_reshaped.to_csv('cleaned_all_routes_data.csv')