In [2]:

from datetime import datetime as dt
import git
import numpy as np
import pandas as pd
import warnings

from vehicle_stream_pipeline import utils

warnings.filterwarnings("ignore")


In [3]:
repo = git.Repo(".", search_parent_directories=True).git.rev_parse("--show-toplevel")

# fetch data (here we can automate it)
df_stops = pd.read_excel(
    f"{repo}/data/other/MoDstops+Preismodell.xlsx", sheet_name="MoDstops"
)

df_edges = pd.read_excel(
    f"{repo}/data/other/MoDstops+Preismodell.xlsx", sheet_name="Liste 2022"
)

df_edges.rename(columns={"Start #": "start_id", "Ende #": "end_id"}, inplace=True)

rides_df = pd.read_csv(f"{repo}/data/cleaning/data_cleaned.csv")
rides_df = rides_df[(rides_df["state"] == "completed")]
rides_df["scheduled_to"] = pd.to_datetime(rides_df["scheduled_to"])




In [4]:
start_date = min(rides_df["scheduled_to"])
end_date = max(rides_df["scheduled_to"])

print(start_date)
print(end_date)

2021-07-01 07:30:00
2022-05-31 22:28:43


In [5]:

date_range = utils.get_date_range(start_date,end_date)

data_range_len = len(date_range)
print(date_range[10])


(2022, 5)


In [6]:
total_sim_rides = 500 # will be filtered later
sim_rides = int(total_sim_rides/data_range_len)
new_rides_all = pd.DataFrame(columns=rides_df.columns)
for (year, month) in date_range:
            new_rides = utils.generateRideSpecs(
                rides_df,
                df_stops,
                df_edges,
                sim_rides,
                month,
                year,
            )
            new_rides_all = pd.concat([new_rides, new_rides_all])

new_rides_all["simulated"] = True
rides_df["simulated"] = False

In [7]:
new_rides_all['month'] = pd.to_datetime(new_rides_all['scheduled_to'])
new_rides_all['month'] = new_rides_all['month'].apply(lambda x: x.month)
new_rides_all['month'].value_counts()

5     46
8     46
3     45
2     45
1     45
12    45
11    45
10    45
9     45
4     44
7     44
Name: month, dtype: int64

In [18]:
new_rides_all.shape

(99, 52)

In [5]:
newRides = utils.generateRideSpecs(rides_df, df_stops, df_edges, 9002, 6, 2022)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allRoutes['route'] = allRoutes['start_id'].astype(str) + "-" + allRoutes['end_id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allRoutes['counts'] = distWorkday['counts'].min() # noise is weighted similar to least frequent real driven route
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

In [9]:
newRides[['arriving_push', 'vehicle_arrived_at', 'created_at', 'scheduled_to', 'dispatched_at','pickup_at', 'pickup_arrival_time', 'dropoff_at', 'dropoff_eta', 'pickup_first_eta', 'shortest_ridetime']].head(5)

Unnamed: 0,arriving_push,vehicle_arrived_at,created_at,scheduled_to,dispatched_at,pickup_at,pickup_arrival_time,dropoff_at,dropoff_eta,pickup_first_eta,shortest_ridetime
0,2022-06-02 17:11:15,2022-06-02 17:13:08,2022-06-01 15:33:35,2022-06-02 12:00:00,2022-06-02 11:52:00,2022-06-02 17:14:11,19268,2022-06-02 17:17:11,2022-06-02 17:20:11,2022-06-02 17:16:46,526.32
1,2022-06-01 13:56:41,2022-06-01 14:02:28,2022-06-01 08:05:21,2022-06-01 12:40:00,2022-06-01 12:32:00,2022-06-01 14:04:06,5428,2022-06-01 14:19:55,2022-06-01 14:21:20,2022-06-01 14:03:48,583.56
2,2022-06-01 10:42:59,2022-06-01 10:49:22,2022-06-01 10:37:09,2022-06-01 10:37:09,2022-06-01 10:37:09,2022-06-01 10:52:15,733,2022-06-01 11:08:50,2022-06-01 11:09:50,2022-06-01 10:51:12,392.04
3,2022-06-01 08:45:55,2022-06-01 08:47:41,2022-06-01 08:39:59,2022-06-01 08:39:59,2022-06-01 08:39:59,2022-06-01 08:49:13,462,2022-06-01 08:56:48,2022-06-01 09:06:08,2022-06-01 08:47:56,399.48
4,2022-06-01 18:43:25,2022-06-01 18:43:35,2022-06-01 15:51:30,2022-06-01 17:00:00,2022-06-01 16:52:00,2022-06-01 18:44:47,6695,2022-06-01 18:58:23,2022-06-01 18:54:50,2022-06-01 18:46:37,583.56


In [10]:
newRides

Unnamed: 0,id,user_id,distance,number_of_passenger,price_operations,price_offer,price_payed,free_ride,payment_type,pickup_address,dropoff_address,state,created_from_offer,created_at,scheduled_to,dispatched_at,pickup_arrival_time,arriving_push,vehicle_arrived_at,earliest_pickup_expectation,pickup_first_eta,pickup_eta,pickup_at,dropoff_first_eta,dropoff_eta,dropoff_at,updated_at,arrival_deviation,waiting_time,boarding_time,ride_time,trip_time,shortest_ridetime,delay,longer_route_factor,arrival_indicator,rating,rating_puenktlichkeit,rating_sauberkeit,rating_fahrer,rating_find_modstop,rating_other_comments,cancellation_reason,cancellation_comment,bahn_card_number,year_card_type,year_card_number,canceled_at,rating_question_one,rating_question_two,index
0,1659522228-0,0-1659522228,4386,1.0,,,,False,BAHN_CARD,9006,10003,completed,,2022-06-01 15:33:35,2022-06-02 12:00:00,2022-06-02 11:52:00,19268,2022-06-02 17:11:15.000000000,2022-06-02 17:13:08,2022-06-02 11:55:00,2022-06-02 17:16:46,2022-06-02 17:16:46,2022-06-02 17:14:11,2022-06-02 17:25:32.320,2022-06-02 17:20:11,2022-06-02 17:17:11,,-67.0,19088.0,63.0,180.0,19268.0,526.32,18741.68,0.34,Kein Signalschild,5.0,,,,,,,,,,,,,,
1,1659522228-1,1-1659522228,4863,1.0,,,,False,STANDARD,5008,1010,completed,,2022-06-01 08:05:21,2022-06-01 12:40:00,2022-06-01 12:32:00,5428,2022-06-01 13:56:41.000000000,2022-06-01 14:02:28,2022-06-01 12:35:00,2022-06-01 14:03:48,2022-06-01 14:03:48,2022-06-01 14:04:06,2022-06-01 14:13:31.560,2022-06-01 14:21:20,2022-06-01 14:19:55,,167.0,5248.0,98.0,949.0,6197.0,583.56,5613.44,1.63,Kein Signalschild,5.0,,,,,,,,,,,,,,
2,1659522228-2,2-1659522228,3267,1.0,,,,False,STANDARD,11013,16027,completed,,2022-06-01 10:37:09,2022-06-01 10:37:09,2022-06-01 10:37:09,733,2022-06-01 10:42:59.000000000,2022-06-01 10:49:22,2022-06-01 10:40:09,2022-06-01 10:51:12,2022-06-01 10:53:17,2022-06-01 10:52:15,2022-06-01 10:57:44.040,2022-06-01 11:09:50,2022-06-01 11:08:50,,203.0,553.0,173.0,995.0,1548.0,392.04,1155.96,2.54,Kein Signalschild,2.0,,,,,,,,,,,,,,
3,1659522228-3,3-1659522228,3329,1.0,,,,False,STANDARD,15015,6003,completed,,2022-06-01 08:39:59,2022-06-01 08:39:59,2022-06-01 08:39:59,462,2022-06-01 08:45:55.000000000,2022-06-01 08:47:41,2022-06-01 08:42:59,2022-06-01 08:47:56,2022-06-01 08:47:56,2022-06-01 08:49:13,2022-06-01 08:54:35.480,2022-06-01 09:06:08,2022-06-01 08:56:48,,-74.0,282.0,92.0,455.0,737.0,399.48,337.52,1.14,Kein Signalschild,5.0,,,,,,,,,,,,,,
4,1659522228-4,4-1659522228,4863,1.0,,,,False,VRN,6002,1002,completed,,2022-06-01 15:51:30,2022-06-01 17:00:00,2022-06-01 16:52:00,6695,2022-06-01 18:43:25.000000000,2022-06-01 18:43:35,2022-06-01 16:55:00,2022-06-01 18:46:37,2022-06-01 18:46:37,2022-06-01 18:44:47,2022-06-01 18:56:20.560,2022-06-01 18:54:50,2022-06-01 18:58:23,,-170.0,6515.0,72.0,816.0,7331.0,583.56,6747.44,1.40,Kein Signalschild,5.0,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8997,1659522228-8997,8997-1659522228,3791,1.0,,,,False,BAHN_CARD,4038,9018,completed,,2022-06-30 21:05:04,2022-06-30 21:05:04,2022-06-30 21:05:04,1616,2022-06-30 21:33:24.000000000,2022-06-30 21:32:00,2022-06-30 21:08:04,2022-06-30 21:34:09,2022-06-30 21:35:04,2022-06-30 21:33:44,2022-06-30 21:41:43.920,2022-06-30 21:40:45,2022-06-30 21:42:40,,-264.0,1436.0,104.0,536.0,1972.0,454.92,1517.08,1.18,Kein Signalschild,5.0,,,,,,,,,,,,,,
8998,1659522228-8998,8998-1659522228,1725,1.0,,,,False,STANDARD,4007,7001,completed,,2022-06-30 22:56:26,2022-07-01 07:00:00,2022-07-01 06:52:00,6971,2022-07-01 08:36:37.000000000,2022-07-01 08:48:11,2022-07-01 06:55:00,2022-07-01 08:43:04,2022-07-01 08:49:46,2022-07-01 08:48:53,2022-07-01 08:46:31.000,2022-07-01 08:56:09,2022-07-01 08:55:19,,514.0,6791.0,42.0,386.0,7177.0,207.00,6970.00,1.86,Kein Signalschild,5.0,,,,,,,,,,,,,,
8999,1659522228-8999,8999-1659522228,3286,1.0,,,,False,STANDARD,13007,9003,completed,,2022-06-30 17:33:54,2022-06-30 17:33:54,2022-06-30 17:33:54,964,2022-06-30 17:51:04.000000000,2022-06-30 17:49:58,2022-06-30 17:36:54,2022-06-30 17:52:53,2022-06-30 17:53:04,2022-06-30 17:53:00,2022-06-30 17:59:27.320,2022-06-30 17:57:42,2022-06-30 18:00:15,,-246.0,784.0,182.0,435.0,1219.0,394.32,824.68,1.10,Kein Signalschild,5.0,,,,,,,,,,,,,,
9000,1659522228-9000,9000-1659522228,883,1.0,,,,False,STANDARD,12008,3012,completed,,2022-06-30 23:53:16,2022-06-30 23:53:16,2022-06-30 23:53:16,43,2022-06-30 23:57:05.000000000,2022-06-30 23:53:59,2022-06-30 23:56:16,2022-06-30 23:56:16,2022-06-30 23:58:05,2022-06-30 23:56:40,2022-06-30 23:58:01.960,2022-07-01 00:06:21,2022-07-01 00:07:06,,-366.0,-137.0,161.0,626.0,489.0,105.96,383.04,5.91,Kein Signalschild,5.0,,,,,,,,,,,,,,
