In [110]:
import pandas as pd
import numpy as np
import git
from datetime import datetime as dt


### Raw Data Analysis

In [111]:
repo = git.Repo(".", search_parent_directories=True).git.rev_parse("--show-toplevel")
orig_rides_df = pd.read_csv(f"{repo}/data/rides_combined.csv")
orig_rides_df.columns

  exec(code_obj, self.user_global_ns, self.user_ns)


Index(['Unnamed: 0', 'id', 'user_id', 'distance', 'number_of_passenger',
       'price_operations', 'price_offer', 'price_payed', 'free_ride',
       'payment_type', 'pickup_address', 'dropoff_address', 'state',
       'created_from_offer', 'created_at', 'scheduled_to', 'dispatched_at',
       'pickup_arrival_time', 'arriving_push', 'vehicle_arrived_at',
       'earliest_pickup_expectation', 'pickup_first_eta', 'pickup_eta',
       'pickup_at', 'dropoff_first_eta', 'dropoff_eta', 'dropoff_at',
       'updated_at', 'arrival_deviation', 'waiting_time', 'boarding_time',
       'ride_time', 'trip_time', 'shortest_ridetime', 'delay',
       'longer_route_factor', 'arrival_indicator', 'rating',
       'rating_puenktlichkeit', 'rating_sauberkeit', 'rating_fahrer',
       'rating_find_modstop', 'rating_other_comments', 'cancellation_reason',
       'cancellation_comment', 'bahn_card_number', 'year_card_type',
       'year_card_number', 'canceled_at', 'rating_question_one',
       'rating_quest

In [112]:
orig_external_df = pd.read_excel(f"{repo}/data/vehicle_data/Autofleet_Rides with External ID_2021+2022-05-15.xlsx")
orig_raw_df = pd.read_excel(f"{repo}/data/vehicle_data/MoD_Raw Data_2021+2022-05-15.xlsx")
orig_vehicle_usage_df = pd.read_excel(f"{repo}/data/vehicle_data/MoD_Vehicle Usage_2021+2022-05-15.xlsx")


In [113]:
print(f'Shape orig_rides_df: {orig_rides_df.shape}')
print(f'Shape orig_external_df: {orig_external_df.shape}')
print(f'Shape orig_raw_df: {orig_raw_df.shape}')
print(f'Shape orig_vehicle_usage_df: {orig_vehicle_usage_df.shape}')

Shape orig_rides_df: (18980, 51)
Shape orig_external_df: (18148, 32)
Shape orig_raw_df: (36393, 36)
Shape orig_vehicle_usage_df: (173041, 12)


In [114]:
rides_df = orig_rides_df
external_df = orig_external_df
raw_df = orig_raw_df
vehicle_usage_df = orig_vehicle_usage_df

In [115]:
print(rides_df['id'].isin(external_df['External Id']).unique())
print(rides_df['id'].isin(external_df['Id']).unique())
print(rides_df['id'].isin(vehicle_usage_df['Ride Id']).unique()) # Not unique because of Nan Values
print(vehicle_usage_df['Ride Id'].isin(external_df['External Id']).unique())
print(vehicle_usage_df['Ride Id'].isin(external_df['Id']).unique()) # Not unique because of Nan Values
print(vehicle_usage_df['Vehicle Id'].isin(external_df).unique())

[ True False]
[False]
[False  True]
[False  True]
[False  True]
[False]


In [116]:
filt_rides = rides_df[rides_df['id'].isin(external_df['External Id'])]
filt_rides_2 = rides_df[rides_df['id'].isin(vehicle_usage_df['Ride Id'])]
filt_rides_3 = rides_df[rides_df['id'].isin(raw_df['Ride External Id'])]

filt_vehicle_usage = vehicle_usage_df[vehicle_usage_df['Ride Id'].isin(external_df['Id'])]
filt_vehicle_usage_2 = vehicle_usage_df[vehicle_usage_df['Ride Id'].isin(external_df['External Id'])]
filt_vehicle_usage_3 = vehicle_usage_df[vehicle_usage_df['Ride Id'].isin(raw_df['Ride Id'])]


print(f"Matches between combined rides and autofleet_external id: {filt_rides['id'].count()}")
print(f"Matches between combined rides and raw_id: {filt_rides_3['id'].count()}")
print(f"Match values between combined_rides and vehicle_usage: {filt_rides_2['id'].unique()}")
print(f"Matches between vehicle_usage and autofleet_id: {filt_vehicle_usage['Ride Id'].count()}")
print(f"Matches between vehicle_usage and raw_id: {filt_vehicle_usage_3['Ride Id'].count()}")
print(f"Match values between vehicle_usage_id and autofleet_external id: {filt_vehicle_usage_2['Ride Id'].unique()}")

Matches between combined rides and autofleet_external id: 9496
Matches between combined rides and raw_id: 9496
Match values between combined_rides and vehicle_usage: [nan]
Matches between vehicle_usage and autofleet_id: 103910
Matches between vehicle_usage and raw_id: 103910
Match values between vehicle_usage_id and autofleet_external id: [nan]


In [117]:
print(external_df.columns)
# print(raw_df.columns)
print(vehicle_usage_df.columns)

Index(['Id', 'External Id', 'Ride Type', 'Matching Type', 'Dispatch Type',
       'Schedule Time (UTC)', 'Status', 'Rejection reason',
       'Contact person/passenger name', 'Contact person/passenger phone',
       'Demand Source name', 'Driver name', 'Driver external ID',
       'Vehicle plate', 'Price', 'Currency', 'Pooling', 'Rating',
       'Arrived to Pickup (UTC)', 'Pickup Completed (UTC)', 'Pickup address',
       'Pickup coordinates', 'Arrived to Dropoff (UTC)',
       'Dropoff Completed (UTC)', 'Dropoff address', 'Dropoff coordinates',
       'Actual Duration of Ride (min)', 'Planned Distance (m)',
       'Number Of Passengers', 'Number Of Items', 'Created by',
       'Created at (UTC)'],
      dtype='object')
Index(['Vehicle Id', 'Ride Type', 'Ride Id', 'Stop Point Type',
       'Stop Point Id', 'Stop Point status', 'Stop point completed (UTC)',
       'Vehicle plate', 'Lat', 'Lng', 'Actual Distance of Ride (m)',
       'Odometer Reading (m)'],
      dtype='object')


### Merge Vehicle_Usage, Autofleet_external and combined_rides

In [118]:
# vehicle_usage_df preprocessing - filteirng on Stop Point type and status + drop remaining duplicates

merge_vehicle_df = vehicle_usage_df[(vehicle_usage_df["Stop Point Type"] == "dropoff") & (vehicle_usage_df["Stop Point status"] == "completed")]
merge_vehicle_df.dropna(subset=["Ride Id"], inplace= True)
merge_vehicle_df.sort_values(by="Vehicle Id", inplace = True)
dupl_vehicle_df = merge_vehicle_df[merge_vehicle_df.duplicated( subset=["Ride Id"] , keep= False)]
merge_vehicle_df.drop_duplicates(subset=["Ride Id"], inplace= True)
# dupl_vehicle_df = merge_vehicle_df[merge_vehicle_df.duplicated( subset=["Ride Id"] , keep= False)]
print(f"shape merge_vehicle_df: {merge_vehicle_df.shape}")
dupl_vehicle_df

shape merge_vehicle_df: (14909, 12)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,Vehicle Id,Ride Type,Ride Id,Stop Point Type,Stop Point Id,Stop Point status,Stop point completed (UTC),Vehicle plate,Lat,Lng,Actual Distance of Ride (m),Odometer Reading (m)
39918,54fc7c8b-940f-4ba6-abda-9237be36e57a,delivery,41ca68f3-af07-494a-9d78-81fdf875713c,dropoff,28fb0baf-36ba-4ca3-a9e4-ced8b19ee3f3,completed,2021-01-19 05:54:51,NW-MD-3E,4.935084e+08,81662671,1023.0,74344970.0
39917,54fc7c8b-940f-4ba6-abda-9237be36e57a,delivery,41ca68f3-af07-494a-9d78-81fdf875713c,dropoff,28fb0baf-36ba-4ca3-a9e4-ced8b19ee3f3,completed,2021-01-19 05:54:51,NW-MD-3E,4.935084e+08,81662671,1023.0,
39749,54fc7c8b-940f-4ba6-abda-9237be36e57a,delivery,e0c76835-0b1b-4f60-8026-55c151632024,dropoff,a2717fba-cb62-463c-b8b7-5a1c8858de1f,completed,2021-01-20 14:17:10,NW-MD-3E,4.934722e+15,8129687894255340,43.0,74419030.0
39750,54fc7c8b-940f-4ba6-abda-9237be36e57a,delivery,e0c76835-0b1b-4f60-8026-55c151632024,dropoff,a2717fba-cb62-463c-b8b7-5a1c8858de1f,completed,2021-01-20 14:17:10,NW-MD-3E,4.934722e+15,8129687894255340,43.0,
39751,54fc7c8b-940f-4ba6-abda-9237be36e57a,passenger,8e459ada-9770-431e-b5b3-0e59e096344a,dropoff,1d9161a0-a878-4860-b9f5-f55ba33500fe,completed,2021-01-20 14:13:26,NW-MD-3E,4.934720e+15,812962087642664,544.0,74417420.0
...,...,...,...,...,...,...,...,...,...,...,...,...
39520,e04d5c44-b18b-4775-923f-22fa3f877399,passenger,819646ce-6240-4032-b82a-d016be723ae5,dropoff,50c2dd72-6eef-4288-abbe-80b87c25200a,completed,2021-01-23 08:53:01,NW-MD-16E,4.933326e+08,81207048,4355.0,
39521,e04d5c44-b18b-4775-923f-22fa3f877399,passenger,819646ce-6240-4032-b82a-d016be723ae5,dropoff,50c2dd72-6eef-4288-abbe-80b87c25200a,completed,2021-01-23 08:53:01,NW-MD-16E,4.933326e+08,81207048,4355.0,
39822,e04d5c44-b18b-4775-923f-22fa3f877399,passenger,03c16ce6-3403-4dba-8c12-af746ed75465,dropoff,3a036b0e-7312-44f4-a815-6f7c4187d64c,completed,2021-01-19 15:12:03,NW-MD-16E,4.934261e+08,81545194,197.0,
39823,e04d5c44-b18b-4775-923f-22fa3f877399,passenger,03c16ce6-3403-4dba-8c12-af746ed75465,dropoff,3a036b0e-7312-44f4-a815-6f7c4187d64c,completed,2021-01-19 15:12:03,NW-MD-16E,4.934261e+08,81545194,197.0,1089970.0


In [119]:
merge_vehicle_df[merge_vehicle_df["Ride Type"]== "passenger"].shape

(10211, 12)

In [120]:
# external_df preprocessing
duplicated_external_df = external_df[external_df.duplicated( subset=["Id"] , keep= False)]
nan_external_df = external_df[external_df["Id"].isna()]
merge_external_df = external_df[external_df["Id"].isin(vehicle_usage_df['Ride Id'])]
merge_external_df
print(f"shape external_df: {external_df.shape}")
print(f"shape merge_external_df: {merge_external_df.shape}")



shape external_df: (18148, 32)
shape merge_external_df: (18104, 32)


In [121]:
# Left Join filtered vehicle df and external df
vehicle_external_merge = merge_vehicle_df.merge(merge_external_df,how='left',left_on="Ride Id",right_on="Id")
print(f"Shape vehicle_external_merge before filtering: {vehicle_external_merge.shape}")
vehicle_external_merge = vehicle_external_merge[~vehicle_external_merge["External Id"].isna()]
print(f"Shape vehicle_external_merge after External ID isna filtering: {vehicle_external_merge.shape}")
vehicle_external_merge = vehicle_external_merge[vehicle_external_merge["External Id"].isin(rides_df['id'])]
print(f"Shape vehicle_external_merge after isin rides_df filtering: {vehicle_external_merge.shape}")
vehicle_external_merge.drop_duplicates(subset=["External Id"], inplace=True)
print(f"Shape vehicle_external_merge after duplicates filtering: {vehicle_external_merge.shape}")


Shape vehicle_external_merge before filtering: (14909, 44)
Shape vehicle_external_merge after External ID isna filtering: (14618, 44)
Shape vehicle_external_merge after isin rides_df filtering: (8031, 44)
Shape vehicle_external_merge after duplicates filtering: (8031, 44)


In [122]:
print(f"Shape orig_rides_df: {rides_df.shape}")
duplicated_rides_df = rides_df[(rides_df.duplicated( subset=["id"] , keep= "last")) & ~rides_df["id"].isna()]
print(f"Shape duplicated_rides_df: {duplicated_rides_df.shape}")
rides_merge = rides_df[(~rides_df.duplicated( subset=["id"])) | (rides_df["id"].isnull())]
print(f"Shape rides_df dropped duplicates in id: {rides_merge.shape}")

Shape orig_rides_df: (18980, 51)
Shape duplicated_rides_df: (4, 51)
Shape rides_df dropped duplicates in id: (18976, 51)


In [123]:
# Left Join removed duplicates rides_df & filterd vehicle_external_merge
rides_vehicle_merge_df = rides_merge.merge(vehicle_external_merge,how='left',left_on="id",right_on="External Id")
print(f"Shape rides_vehicle_merge_df: {rides_vehicle_merge_df.shape}")
rides_vehicle_merge_df

Shape rides_vehicle_merge_df: (18976, 95)


Unnamed: 0.1,Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,payment_type,...,Arrived to Dropoff (UTC),Dropoff Completed (UTC),Dropoff address,Dropoff coordinates,Actual Duration of Ride (min),Planned Distance (m),Number Of Passengers,Number Of Items,Created by,Created at (UTC)
0,0,5727475e-8224-4302-9228-c92b9d4a5220,f8ff0526-887a-4e48-ad96-977e12fd70c1,5483,1.0,4.65,4.65,0.00,0.0,STANDARD,...,2021-07-01 05:44:43,2021-07-01 05:44:44,Globus,"[49.339, 8.16]",11.0,5399,1.0,,Locomotion Service Account,2021-06-30 21:12:47
1,1,18fec0a6-b7ba-442b-8472-04bdb6ba1b86,51e1a1a8-995c-488c-84ce-3789e46f0417,3575,1.0,0.00,2.77,0.00,0.0,BAHN_CARD,...,NaT,NaT,,,,,,,,NaT
2,2,bb916271-0627-4196-8ec1-5324e0e1f71d,f07028da-ca7e-4713-9e45-743c71712e80,3040,1.0,3.45,1.55,1.55,0.0,VRN,...,2021-07-01 07:42:27,2021-07-01 07:42:29,Globus,"[49.339, 8.16]",6.0,3346,1.0,,Locomotion Service Account,2021-07-01 07:21:39
3,3,3cffa0f3-e278-4828-b0a1-f55cb35c1adb,44f61d06-8e79-42c6-9abd-0e85fcaf9d6d,7233,1.0,0.00,5.55,0.00,1.0,STANDARD,...,NaT,NaT,,,,,,,,NaT
4,4,,1a6d2ec4-7e85-4e5b-aed0-1c3693268986,3998,,,,,,STANDARD,...,NaT,NaT,,,,,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18971,18975,bf4e209a-8325-4e93-acd7-dec31f8864a6,61568918-262c-4360-91e0-1e71f4d2af4d,1901,1.0,3.10,1.45,1.45,0.0,VRN,...,NaT,NaT,,,,,,,,NaT
18972,18976,d529e378-3924-411b-8cf7-d09881d008fb,44f61d06-8e79-42c6-9abd-0e85fcaf9d6d,4152,1.0,4.31,2.16,2.16,0.0,VRN,...,NaT,NaT,,,,,,,,NaT
18973,18977,,6a2ade0c-d0a4-4173-a214-9ebe57758ae3,4227,,4.37,,,,BAHN_CARD,...,NaT,NaT,,,,,,,,NaT
18974,18978,,817aaaf0-d5e5-4838-b246-452fad2490ef,3799,,4.09,,,,VRN,...,NaT,NaT,,,,,,,,NaT


In [124]:
final_matches = rides_vehicle_merge_df[~rides_vehicle_merge_df["Vehicle Id"].isna()]["Vehicle Id"].count()
print(f"Matches between combined rides and autofleet_external id: {filt_rides['id'].count()}")
print(f"Matches between combined rides and autofleet_external id after vehicle usage match: {final_matches}")

Matches between combined rides and autofleet_external id: 9496
Matches between combined rides and autofleet_external id after vehicle usage match: 8031


In [125]:
rides_vehicle_merge_df.columns

Index(['Unnamed: 0', 'id', 'user_id', 'distance', 'number_of_passenger',
       'price_operations', 'price_offer', 'price_payed', 'free_ride',
       'payment_type', 'pickup_address', 'dropoff_address', 'state',
       'created_from_offer', 'created_at', 'scheduled_to', 'dispatched_at',
       'pickup_arrival_time', 'arriving_push', 'vehicle_arrived_at',
       'earliest_pickup_expectation', 'pickup_first_eta', 'pickup_eta',
       'pickup_at', 'dropoff_first_eta', 'dropoff_eta', 'dropoff_at',
       'updated_at', 'arrival_deviation', 'waiting_time', 'boarding_time',
       'ride_time', 'trip_time', 'shortest_ridetime', 'delay',
       'longer_route_factor', 'arrival_indicator', 'rating',
       'rating_puenktlichkeit', 'rating_sauberkeit', 'rating_fahrer',
       'rating_find_modstop', 'rating_other_comments', 'cancellation_reason',
       'cancellation_comment', 'bahn_card_number', 'year_card_type',
       'year_card_number', 'canceled_at', 'rating_question_one',
       'rating_quest

### Find combined rides between rides

In [126]:
df = rides_vehicle_merge_df
# df.dropna(subset=["Vehicle Id"],inplace=True)
# df = df[["id","Vehicle Id","pickup_at","dropoff_at"]]
df.reset_index()

Unnamed: 0.1,index,Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,...,Arrived to Dropoff (UTC),Dropoff Completed (UTC),Dropoff address,Dropoff coordinates,Actual Duration of Ride (min),Planned Distance (m),Number Of Passengers,Number Of Items,Created by,Created at (UTC)
0,0,0,5727475e-8224-4302-9228-c92b9d4a5220,f8ff0526-887a-4e48-ad96-977e12fd70c1,5483,1.0,4.65,4.65,0.00,0.0,...,2021-07-01 05:44:43,2021-07-01 05:44:44,Globus,"[49.339, 8.16]",11.0,5399,1.0,,Locomotion Service Account,2021-06-30 21:12:47
1,1,1,18fec0a6-b7ba-442b-8472-04bdb6ba1b86,51e1a1a8-995c-488c-84ce-3789e46f0417,3575,1.0,0.00,2.77,0.00,0.0,...,NaT,NaT,,,,,,,,NaT
2,2,2,bb916271-0627-4196-8ec1-5324e0e1f71d,f07028da-ca7e-4713-9e45-743c71712e80,3040,1.0,3.45,1.55,1.55,0.0,...,2021-07-01 07:42:27,2021-07-01 07:42:29,Globus,"[49.339, 8.16]",6.0,3346,1.0,,Locomotion Service Account,2021-07-01 07:21:39
3,3,3,3cffa0f3-e278-4828-b0a1-f55cb35c1adb,44f61d06-8e79-42c6-9abd-0e85fcaf9d6d,7233,1.0,0.00,5.55,0.00,1.0,...,NaT,NaT,,,,,,,,NaT
4,4,4,,1a6d2ec4-7e85-4e5b-aed0-1c3693268986,3998,,,,,,...,NaT,NaT,,,,,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18971,18971,18975,bf4e209a-8325-4e93-acd7-dec31f8864a6,61568918-262c-4360-91e0-1e71f4d2af4d,1901,1.0,3.10,1.45,1.45,0.0,...,NaT,NaT,,,,,,,,NaT
18972,18972,18976,d529e378-3924-411b-8cf7-d09881d008fb,44f61d06-8e79-42c6-9abd-0e85fcaf9d6d,4152,1.0,4.31,2.16,2.16,0.0,...,NaT,NaT,,,,,,,,NaT
18973,18973,18977,,6a2ade0c-d0a4-4173-a214-9ebe57758ae3,4227,,4.37,,,,...,NaT,NaT,,,,,,,,NaT
18974,18974,18978,,817aaaf0-d5e5-4838-b246-452fad2490ef,3799,,4.09,,,,...,NaT,NaT,,,,,,,,NaT


In [127]:
from numpy import NaN

# Create empty combined ride columns - need to be adjusted if more than 3 rides combined
df["combined_rides_1"] = NaN
df["combined_rides_2"] = NaN
df["combined_rides_3"] = NaN

for index, row in df.iterrows():
    
    # skip offers and rides w/o vehicle Id
    if row["Vehicle Id"] == NaN:
        continue

    ride_id = row["id"]
    vehicle_id = row["Vehicle Id"]
    pickup = row["pickup_at"]
    dropoff = row["dropoff_at"]

    # Expressions Match vehicle Id and different time scenarios
    exp_vehicle = (df["Vehicle Id"] == vehicle_id)
    # smaller time means earlier
    exp_1 = ((df["pickup_at"] > pickup) & (df["dropoff_at"] < dropoff))
    exp_2 = ((df["pickup_at"] < pickup) & (df["dropoff_at"] < dropoff) & (df["dropoff_at"] > pickup))
    exp_3 = ((df["pickup_at"] < pickup) & (df["dropoff_at"] > dropoff))
    exp_4 = ((df["pickup_at"] > pickup) & (df["dropoff_at"] > dropoff) & (df["pickup_at"] < dropoff))
   
    filt_df = (exp_vehicle & (exp_1|exp_2|exp_3|exp_4))
    true_count_filt_df = filt_df[filt_df==True].count()

    if true_count_filt_df == 1:
        vehicle_id_list = df["id"][filt_df].to_list()
        df.loc[df.id == ride_id,["combined_rides_1"]] = vehicle_id_list
    elif true_count_filt_df == 2:
        vehicle_id_list = df["id"][filt_df].to_list()
        df.loc[df.id == ride_id,["combined_rides_1","combined_rides_2"]] = vehicle_id_list
    elif true_count_filt_df == 3:
        vehicle_id_list = df["id"][filt_df].to_list()
        df.loc[df.id == ride_id,["combined_rides_1","combined_rides_2","combined_rides_3"]] = vehicle_id_list
        
    


In [128]:
# Rides with 1 combined Rides
df[~df["combined_rides_1"].isna()]

Unnamed: 0.1,Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,payment_type,...,Dropoff coordinates,Actual Duration of Ride (min),Planned Distance (m),Number Of Passengers,Number Of Items,Created by,Created at (UTC),combined_rides_1,combined_rides_2,combined_rides_3
31,31,fcdb3897-46e6-44a1-8378-9b60e0074a89,181628b6-2ecd-4368-bcaa-c9e38162dc57,6025,2.0,8.00,8.00,8.00,0.0,STANDARD,...,"[49.323, 8.123]",26.0,5444,2.0,,Locomotion Service Account,2021-07-02 06:42:22,13c5ae77-be50-48b2-a480-d72dae10a56f,,
32,32,13c5ae77-be50-48b2-a480-d72dae10a56f,ff86a402-44e6-4369-9ef1-208ed6b4b10f,5126,2.0,7.10,6.60,6.60,0.0,BAHN_CARD,...,"[49.319, 8.135]",12.0,5141,2.0,,Locomotion Service Account,2021-07-03 13:05:45,fcdb3897-46e6-44a1-8378-9b60e0074a89,,
42,42,46091514-02f6-42a2-91df-7bfd906f45d8,b6537f08-519c-4c67-ad43-6996aca5fb9a,5774,1.0,4.80,4.80,4.80,0.0,STANDARD,...,"[49.349, 8.118]",12.0,6124,1.0,,Locomotion Service Account,2021-07-03 20:33:24,94f26780-5a77-4964-8ab6-1d8509519fbc,,
43,43,94f26780-5a77-4964-8ab6-1d8509519fbc,c42b0079-ac0c-4bd1-9a8b-35b49009bdcc,4347,2.0,6.30,4.40,4.40,0.0,VRN,...,"[49.329, 8.13]",16.0,4347,2.0,,Locomotion Service Account,2021-07-03 20:33:53,46091514-02f6-42a2-91df-7bfd906f45d8,,
52,52,7334fcd1-2f01-4e99-8561-bb0ee77825c9,da176cc3-fc5c-4870-b130-74cd26a3b5d7,4989,1.0,4.40,4.40,4.40,False,STANDARD,...,"[49.356, 8.137]",10.0,4721,1.0,,Locomotion Service Account,2021-07-06 05:51:44,510c75fb-2df2-4f8d-9414-dda841a9889b,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16908,16912,f6ad390e-3635-4d20-aae9-501b8ff785cd,637449e9-bfeb-4ab9-b77e-41f8124706ed,6659,2.0,9.92,9.92,9.92,0.0,STANDARD,...,"[49.32, 8.145]",20.0,0,2.0,,mod-service-account Service Account,2022-05-14 11:58:33,3923896a-e5d1-4733-b41e-26f29b4cd5bc,,
16913,16917,efc1b554-6ec9-47db-af7f-994ad84eaf0d,b3751ac8-245a-4059-afb1-b56032c37f9d,3145,2.0,6.45,6.45,6.45,0.0,STANDARD,...,"[49.376, 8.153]",8.0,0,2.0,,mod-service-account Service Account,2022-05-14 08:06:05,619a40d8-d92a-4dd6-8917-80403b8f084e,,
16956,16960,03c2d558-c595-49aa-94ff-51e51dda2fef,7190235c-135a-4171-a4ff-618fd3f0aedb,3284,1.0,3.82,3.82,3.82,0.0,STANDARD,...,"[49.374, 8.159]",40.0,0,1.0,,mod-service-account Service Account,2022-05-14 15:13:51,b94af3aa-ea3c-493d-b608-338e9afdf603,e4118a1d-d5a2-4e31-994e-b03e0dbad724,
16987,16991,b94af3aa-ea3c-493d-b608-338e9afdf603,3984f9ca-d47d-49b2-a3fa-bfa3f43f857a,3711,3.0,10.37,10.37,0.00,1.0,STANDARD,...,"[49.361, 8.139]",44.0,0,3.0,,mod-service-account Service Account,2022-05-14 10:47:39,03c2d558-c595-49aa-94ff-51e51dda2fef,,


In [129]:
# Rides with 2 combined Rides
df[~df["combined_rides_2"].isna()]

Unnamed: 0.1,Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,payment_type,...,Dropoff coordinates,Actual Duration of Ride (min),Planned Distance (m),Number Of Passengers,Number Of Items,Created by,Created at (UTC),combined_rides_1,combined_rides_2,combined_rides_3
120,120,f8cb3a2d-b8da-4895-83f4-8872811dc35a,817aaaf0-d5e5-4838-b246-452fad2490ef,4585,1.0,4.20,2.30,2.30,0.0,VRN,...,"[49.329, 8.13]",16.0,3887,1.0,,Locomotion Service Account,2021-07-09 07:28:45,41816331-a7db-4fd9-9dc9-cfa9995bb58b,71594697-5cee-45db-aa01-b641a2d81676,
124,124,41816331-a7db-4fd9-9dc9-cfa9995bb58b,1a6b4e7f-6766-4d39-beef-9df24e641128,4470,1.0,4.15,2.25,0.00,1.0,VRN,...,"[49.329, 8.13]",10.0,3893,1.0,,Locomotion Service Account,2021-07-09 07:37:52,f8cb3a2d-b8da-4895-83f4-8872811dc35a,71594697-5cee-45db-aa01-b641a2d81676,
126,126,71594697-5cee-45db-aa01-b641a2d81676,5722147a-0d54-422f-8d47-ddca1023509d,2193,1.0,3.00,3.00,0.00,1.0,STANDARD,...,"[49.33, 8.131]",4.0,1987,1.0,,Locomotion Service Account,2021-07-09 07:43:49,f8cb3a2d-b8da-4895-83f4-8872811dc35a,41816331-a7db-4fd9-9dc9-cfa9995bb58b,
205,205,1a98cf8a-b1a0-4f9c-a9df-91684fd8f1fb,fb2abb7d-6bbd-4bab-8e21-5bf6d4af2a0f,2972,1.0,3.40,3.40,3.40,False,STANDARD,...,"[49.354, 8.132]",22.0,2694,1.0,,mod-service-account Service Account,2021-07-13 12:23:55,fcf7b42b-6a8d-42f2-9d87-89845d82b007,a1a34725-7428-438f-bcec-2319eee34bca,
212,212,271e1f66-d94b-41f0-8f5b-7c4080322962,c42b0079-ac0c-4bd1-9a8b-35b49009bdcc,4081,1.0,3.95,2.05,2.05,False,VRN,...,"[49.352, 8.133]",19.0,4087,1.0,,mod-service-account Service Account,2021-07-13 13:56:52,c53d7b06-433a-4df4-bcaf-980a40f59f56,dddf4c6b-22e1-4586-96fa-7770497ea4f4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16862,16866,e73bd718-da62-4c43-b12f-8fc467a68c42,5c6e13b7-e493-4044-bd76-462acfbeba1b,4149,2.0,7.55,7.55,7.55,0.0,STANDARD,...,"[49.374, 8.155]",9.0,0,2.0,,mod-service-account Service Account,2022-05-14 22:14:54,dfdcbfe2-2e95-4bac-87eb-113b624295f6,de62f237-7bbc-40ce-ae8a-57d338de6865,
16863,16867,de62f237-7bbc-40ce-ae8a-57d338de6865,4949c43c-db28-4ecd-941b-d91249b518ce,4733,1.0,4.64,4.64,4.64,0.0,STANDARD,...,"[49.341, 8.141]",14.0,0,1.0,,mod-service-account Service Account,2022-05-14 21:30:15,dfdcbfe2-2e95-4bac-87eb-113b624295f6,e73bd718-da62-4c43-b12f-8fc467a68c42,
16882,16886,3c99fecb-33cc-40c3-a61b-d24b17c43d58,227d25b5-9f23-4c15-8b96-ec9541a0fd87,4006,1.0,4.26,4.26,4.26,0.0,STANDARD,...,"[49.352, 8.137]",23.0,0,1.0,,mod-service-account Service Account,2022-05-14 19:30:47,37823b9e-6f1d-4e9c-9635-d16b3139038c,2f4af866-21eb-4648-a280-0ea4d04258d6,
16887,16891,950f5aa8-32c0-433d-b09b-d77daa560111,6113d91a-a401-48e4-9e71-2d2d8996926f,4877,1.0,4.70,4.70,4.70,0.0,STANDARD,...,"[49.357, 8.151]",0.0,0,1.0,,mod-service-account Service Account,2022-05-14 22:23:47,7a48cd80-bf76-49ca-80a8-1a0a5403f5e9,d390db23-430b-41d0-ba5a-a03e1660c57e,


In [130]:
# Rides with 3 combined Rides
df[~df["combined_rides_3"].isna()]

Unnamed: 0.1,Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,payment_type,...,Dropoff coordinates,Actual Duration of Ride (min),Planned Distance (m),Number Of Passengers,Number Of Items,Created by,Created at (UTC),combined_rides_1,combined_rides_2,combined_rides_3
15750,15754,0f9e9cbc-52e7-4875-a9da-a34fecbd5fbd,fb2abb7d-6bbd-4bab-8e21-5bf6d4af2a0f,2218,1.0,3.27,3.27,3.27,0.0,STANDARD,...,"[49.352, 8.137]",20.0,0,1.0,,mod-service-account Service Account,2022-05-04 13:26:08,da883985-95c2-4a6b-913f-58b5ee2cf4bb,4103cdfd-f43d-45d0-8ab6-7ae1dab8530a,1b83bab4-1791-4c21-92f7-c71b94d9c61f
