In [2]:
import numpy as np
import pandas as pd

In [3]:
# Read data
df = pd.read_csv("combined1.csv")

In [4]:
df.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,route_name
0,1.4.59010,14:37:07,14:37:07,1.413,1,1.4.1,L4
1,1.4.59010,14:38:38,14:38:51,1.414,2,1.4.1,L4
2,1.4.59010,14:40:01,14:40:15,1.415,3,1.4.1,L4
3,1.4.59010,14:41:15,14:41:29,1.416,4,1.4.1,L4
4,1.4.59010,14:42:26,14:42:40,1.417,5,1.4.1,L4


In [5]:
# see the dataframe type
df.dtypes

trip_id           object
arrival_time      object
departure_time    object
stop_id           object
stop_sequence      int64
route_id          object
route_name        object
dtype: object

In [6]:
# Convert columns to desire type, here we use timedelta, 
# I had tried with datetime, but there was problem of replace the wrong hour data

df.departure_time = pd.to_timedelta(df.departure_time)
df.arrival_time = pd.to_timedelta(df.arrival_time)
df = df.astype({"stop_id":object})

In [7]:
# check the dataframe type
df.dtypes

trip_id                    object
arrival_time      timedelta64[ns]
departure_time    timedelta64[ns]
stop_id                    object
stop_sequence               int64
route_id                   object
route_name                 object
dtype: object

In [8]:
# check how many trip records we have
len(df.trip_id.unique())

2151

In [9]:
# check how many routes (metro and bus) we have
route_name = df.route_name.unique()
route_name

array(['L4', 'L2', 'L5', 'L3', 'L1', '63', '65', '124', '125', '126',
       '127', 'H6', 'H12', 'V19', 'D20'], dtype=object)

In [10]:
# create a new column where if if stop_sequence is 0, meaning that this is the start point
df["start_stop"] = df["stop_sequence"].apply(lambda x:x==1)

In [11]:
# subset of each routes

df_L5 = df[df.route_name=="L5"]
df_L4 = df[df.route_name=="L4"]
df_L3 = df[df.route_name=="L3"]
df_L2 = df[df.route_name=="L2"]
df_L1 = df[df.route_name=="L1"]
df_63 = df[df.route_name=="63"]
df_65 = df[df.route_name=="65"]
df_124 = df[df.route_name=="124"]
df_125 = df[df.route_name=="125"]
df_126 = df[df.route_name=="126"]
df_127 = df[df.route_name=="127"]
df_H6 = df[df.route_name=="H6"]
df_H12 = df[df.route_name=="H12"]
df_V19 = df[df.route_name=="V19"]
df_D20 = df[df.route_name=="D20"]

In [12]:
df_D20.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,route_name,start_stop
54269,2.220.121.290244.2999,13:15:00,13:15:00,2.1788.683453,1,2.220.2999,D20,True
54270,2.220.121.290244.2999,NaT,NaT,2.1284.675156,2,2.220.2999,D20,False
54271,2.220.121.290244.2999,NaT,NaT,2.1282.676540,3,2.220.2999,D20,False
54272,2.220.121.290244.2999,NaT,NaT,2.1604.676843,4,2.220.2999,D20,False
54273,2.220.121.290244.2999,NaT,NaT,2.3348.689612,5,2.220.2999,D20,False


In [13]:
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

# Similar to previous where we find the start point, here we find the end point.
    
df_L1["destination"] = df["stop_sequence"].apply(lambda x:x==30)
df_L2["destination"] = df["stop_sequence"].apply(lambda x:x==18)
df_L3["destination"] = df["stop_sequence"].apply(lambda x:x==26)
df_L4["destination"] = df["stop_sequence"].apply(lambda x:x==22)
df_L5["destination"] = df["stop_sequence"].apply(lambda x:x==26)
df_63["destination"] = df["stop_sequence"].apply(lambda x:x==35)
df_65["destination"] = df["stop_sequence"].apply(lambda x:x==33)
df_124["destination"] = df["stop_sequence"].apply(lambda x:x==17)
df_125["destination"] = df["stop_sequence"].apply(lambda x:x==21)
df_126["destination"] = df["stop_sequence"].apply(lambda x:x==19)
df_127["destination"] = df["stop_sequence"].apply(lambda x:x==26)
df_H6["destination"] = df["stop_sequence"].apply(lambda x:x==30)
df_H12["destination"] = df["stop_sequence"].apply(lambda x:x==34)
df_V19["destination"] = df["stop_sequence"].apply(lambda x:x==38)
df_D20["destination"] = df["stop_sequence"].apply(lambda x:x==30)


## L1

In [14]:
df_L1.describe()

Unnamed: 0,arrival_time,departure_time,stop_sequence
count,7126,7126,7126.0
mean,0 days 14:49:56.609458,0 days 14:50:16.357423,15.423379
std,0 days 05:42:09.767809,0 days 05:42:09.687834,8.64485
min,0 days 05:00:00,0 days 05:00:00,1.0
25%,0 days 09:46:25.250000,0 days 09:46:47.250000,8.0
50%,0 days 14:49:05.500000,0 days 14:49:26.500000,15.0
75%,0 days 19:27:41,0 days 19:28:03.500000,23.0
max,1 days 04:55:55,1 days 04:55:55,30.0


In [15]:
df_L1.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,route_name,start_stop,destination
304,1.1.59031,14:32:22,14:32:22,1.14,1,1.1.1,L1,True,False
305,1.1.59031,14:33:49,14:34:07,1.139,2,1.1.1,L1,False,False
306,1.1.59031,14:35:31,14:35:46,1.138,3,1.1.1,L1,False,False
307,1.1.59031,14:36:54,14:37:09,1.137,4,1.1.1,L1,False,False
308,1.1.59031,14:38:29,14:38:47,1.136,5,1.1.1,L1,False,False


In [16]:
# To extract only rows of start point and end point separately and then combined them together. 
#Sort it by trip and reset their index
df_L1_a = df_L1[(df_L1["start_stop"] == True)]
df_L1_b = df_L1[(df_L1["destination"] == True)]
df_L1_full = pd.concat([df_L1_a, df_L1_b]).sort_values(by=['trip_id'])
df_L1_full =df_L1_full.reset_index(drop=True)

In [17]:
df_L1_full.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,route_name,start_stop,destination
0,1.1.59031,14:32:22,14:32:22,1.14,1,1.1.1,L1,True,False
1,1.1.59031,15:16:16,15:16:16,1.111,30,1.1.1,L1,False,True
2,1.1.59039,15:34:22,15:34:22,1.111,1,1.1.1,L1,True,False
3,1.1.59039,16:18:11,16:18:11,1.14,30,1.1.1,L1,False,True
4,1.1.59042,08:45:32,08:45:32,1.111,1,1.1.1,L1,True,False


In [18]:
# Create a list. Loop over the dataframe and calculate the total travelling time.
travelling_time_L1 = []
for i in range(df_L1_full.shape[0]-1):
    if df_L1_full.trip_id[i] == df_L1_full.trip_id[i+1]:       
        time_interval = df_L1_full.departure_time[i].components.hours
        total_travelling_time = df_L1_full.arrival_time[i+1] -df_L1_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_L1.append(x)


In [23]:
df_L1_full["Hours"] = df_L1_full["arrival_time"].apply(lambda x: "22" in x.components.hours )

TypeError: argument of type 'int' is not iterable

In [62]:
# Convert the list into dataframe
travelling_time_L1 = pd.DataFrame(travelling_time_L1)

# Change the column name
travelling_time_L1.columns = ["L1_day_hour", "total_travelling_time"]

Unnamed: 0,0,1
0,14,00:43:54
1,15,00:43:49
2,8,00:45:28
3,6,00:45:28
4,12,-1 days +23:16:06


In [65]:
# convert the timedelta data into minutes, convert it to float and absolute value.
travelling_time_L1["total_travelling_time"] = travelling_time_L1["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_L1["total_travelling_time"] = travelling_time_L1["total_travelling_time"].astype(float)
travelling_time_L1["total_travelling_time"] = travelling_time_L1["total_travelling_time"].abs()
travelling_time_L1.describe()


Unnamed: 0,L1_day_hour,total_travelling_time
count,233.0,233.0
mean,13.175966,44.610157
std,5.801228,0.698213
min,0.0,43.816667
25%,8.0,43.9
50%,13.0,44.9
75%,18.0,45.35
max,23.0,45.533333


In [85]:
# Calculate the mean travelling time by each hour
L1 = travelling_time_L1.groupby(["L1_day_hour"]).mean()


pandas.core.frame.DataFrame

## L2

In [68]:
df_L2_a = df_L2[(df_L2["start_stop"] == True)]
df_L2_b = df_L2[(df_L2["destination"] == True)]
df_L2_full = pd.concat([df_L2_a, df_L2_b]).sort_values(by=['trip_id'])
df_L2_full =df_L2_full.reset_index(drop=True)

travelling_time_L2 = []
for i in range(df_L2_full.shape[0]-1):
    if df_L2_full.trip_id[i] == df_L2_full.trip_id[i+1]:       
        time_interval = df_L2_full.departure_time[i].components.hours
        total_travelling_time = df_L2_full.arrival_time[i+1] -df_L2_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_L2.append(x)

travelling_time_L2 = pd.DataFrame(travelling_time_L2)
travelling_time_L2.head()

travelling_time_L2.columns = ["L2_day_hour", "total_travelling_time"]

travelling_time_L2["total_travelling_time"] = travelling_time_L2["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_L2["total_travelling_time"] = travelling_time_L2["total_travelling_time"].astype(float)
travelling_time_L2["total_travelling_time"] = travelling_time_L2["total_travelling_time"].abs()
travelling_time_L2.describe()

travelling_time_L2.groupby(["L2_day_hour"]).mean()


Unnamed: 0_level_0,total_travelling_time
L2_day_hour,Unnamed: 1_level_1
0,27.55
1,27.394444
2,28.016667
5,28.419444
6,28.49
7,28.640909
8,28.39375
9,28.703571
10,28.406667
11,28.186667


## L3

In [70]:
df_L3_a = df_L3[(df_L3["start_stop"] == True)]
df_L3_b = df_L3[(df_L3["destination"] == True)]
df_L3_full = pd.concat([df_L3_a, df_L3_b]).sort_values(by=['trip_id'])
df_L3_full =df_L3_full.reset_index(drop=True)

travelling_time_L3 = []
for i in range(df_L3_full.shape[0]-1):
    if df_L3_full.trip_id[i] == df_L3_full.trip_id[i+1]:       
        time_interval = df_L3_full.departure_time[i].components.hours
        total_travelling_time = df_L3_full.arrival_time[i+1] -df_L3_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_L3.append(x)

travelling_time_L3 = pd.DataFrame(travelling_time_L3)
travelling_time_L3.head()

travelling_time_L3.columns = ["L3_day_hour", "total_travelling_time"]

travelling_time_L3["total_travelling_time"] = travelling_time_L3["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_L3["total_travelling_time"] = travelling_time_L3["total_travelling_time"].astype(float)
travelling_time_L3["total_travelling_time"] = travelling_time_L3["total_travelling_time"].abs()
travelling_time_L3.describe()

travelling_time_L3.groupby(["L3_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
L3_day_hour,Unnamed: 1_level_1
0,39.804762
1,39.747619
2,39.183333
4,40.166667
5,39.069048
6,40.292857
7,40.045455
8,40.590909
9,40.275439
10,40.380952


## L4

In [71]:
df_L4_a = df_L4[(df_L4["start_stop"] == True)]
df_L4_b = df_L4[(df_L4["destination"] == True)]
df_L4_full = pd.concat([df_L4_a, df_L4_b]).sort_values(by=['trip_id'])
df_L4_full =df_L4_full.reset_index(drop=True)

travelling_time_L4 = []
for i in range(df_L4_full.shape[0]-1):
    if df_L4_full.trip_id[i] == df_L4_full.trip_id[i+1]:       
        time_interval = df_L4_full.departure_time[i].components.hours
        total_travelling_time = df_L4_full.arrival_time[i+1] -df_L4_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_L4.append(x)

travelling_time_L4 = pd.DataFrame(travelling_time_L4)
travelling_time_L4.head()

travelling_time_L4.columns = ["L4_day_hour", "total_travelling_time"]

travelling_time_L4["total_travelling_time"] = travelling_time_L4["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_L4["total_travelling_time"] = travelling_time_L4["total_travelling_time"].astype(float)
travelling_time_L4["total_travelling_time"] = travelling_time_L4["total_travelling_time"].abs()
travelling_time_L4.describe()

travelling_time_L4.groupby(["L4_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
L4_day_hour,Unnamed: 1_level_1
0,35.0
1,35.0
2,35.0
3,35.0
4,35.0
5,35.0
6,35.0
7,35.0
8,35.0
9,35.0


## L5

In [72]:
df_L5_a = df_L5[(df_L5["start_stop"] == True)]
df_L5_b = df_L5[(df_L5["destination"] == True)]
df_L5_full = pd.concat([df_L5_a, df_L5_b]).sort_values(by=['trip_id'])
df_L5_full =df_L5_full.reset_index(drop=True)

travelling_time_L5 = []
for i in range(df_L5_full.shape[0]-1):
    if df_L5_full.trip_id[i] == df_L5_full.trip_id[i+1]:       
        time_interval = df_L5_full.departure_time[i].components.hours
        total_travelling_time = df_L5_full.arrival_time[i+1] -df_L5_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_L5.append(x)

travelling_time_L5 = pd.DataFrame(travelling_time_L5)
travelling_time_L5.head()

travelling_time_L5.columns = ["L5_day_hour", "total_travelling_time"]

travelling_time_L5["total_travelling_time"] = travelling_time_L5["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_L5["total_travelling_time"] = travelling_time_L5["total_travelling_time"].astype(float)
travelling_time_L5["total_travelling_time"] = travelling_time_L5["total_travelling_time"].abs()
travelling_time_L5.describe()

travelling_time_L5.groupby(["L5_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
L5_day_hour,Unnamed: 1_level_1
0,41.369048
2,40.683333
5,40.96875
6,41.65
7,41.839394
8,41.795833
9,41.753571
10,41.814103
11,41.803333
12,41.693056


## Bus 63

In [73]:
df_63_a = df_63[(df_63["start_stop"] == True)]
df_63_b = df_63[(df_63["destination"] == True)]
df_63_full = pd.concat([df_63_a, df_63_b]).sort_values(by=['trip_id'])
df_63_full =df_63_full.reset_index(drop=True)

travelling_time_63 = []
for i in range(df_63_full.shape[0]-1):
    if df_63_full.trip_id[i] == df_63_full.trip_id[i+1]:       
        time_interval = df_63_full.departure_time[i].components.hours
        total_travelling_time = df_63_full.arrival_time[i+1] -df_63_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_63.append(x)

travelling_time_63 = pd.DataFrame(travelling_time_63)
travelling_time_63.head()

travelling_time_63.columns = ["63_day_hour", "total_travelling_time"]

travelling_time_63["total_travelling_time"] = travelling_time_63["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_63["total_travelling_time"] = travelling_time_63["total_travelling_time"].astype(float)
travelling_time_63["total_travelling_time"] = travelling_time_63["total_travelling_time"].abs()
travelling_time_63.describe()

travelling_time_63.groupby(["63_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
63_day_hour,Unnamed: 1_level_1
6,35.5
7,40.666667
8,43.8
9,46.166667
10,48.5
11,47.0
12,48.4
13,49.2
14,48.125
15,47.5


## Bus 65

In [74]:
df_65_a = df_65[(df_65["start_stop"] == True)]
df_65_b = df_65[(df_65["destination"] == True)]
df_65_full = pd.concat([df_65_a, df_65_b]).sort_values(by=['trip_id'])
df_65_full =df_65_full.reset_index(drop=True)

travelling_time_65 = []
for i in range(df_65_full.shape[0]-1):
    if df_65_full.trip_id[i] == df_65_full.trip_id[i+1]:       
        time_interval = df_65_full.departure_time[i].components.hours
        total_travelling_time = df_65_full.arrival_time[i+1] -df_65_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_65.append(x)

travelling_time_65 = pd.DataFrame(travelling_time_65)
travelling_time_65.head()

travelling_time_65.columns = ["65_day_hour", "total_travelling_time"]

travelling_time_65["total_travelling_time"] = travelling_time_65["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_65["total_travelling_time"] = travelling_time_65["total_travelling_time"].astype(float)
travelling_time_65["total_travelling_time"] = travelling_time_65["total_travelling_time"].abs()
travelling_time_65.describe()

travelling_time_65.groupby(["65_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
65_day_hour,Unnamed: 1_level_1
0,35.0
5,37.0
6,37.0
7,38.0
8,40.25
9,43.333333
10,46.5
11,47.0
12,48.0
13,46.4


## Bus 124

In [75]:
df_124_a = df_124[(df_124["start_stop"] == True)]
df_124_b = df_124[(df_124["destination"] == True)]
df_124_full = pd.concat([df_124_a, df_124_b]).sort_values(by=['trip_id'])
df_124_full =df_124_full.reset_index(drop=True)

travelling_time_124 = []
for i in range(df_124_full.shape[0]-1):
    if df_124_full.trip_id[i] == df_124_full.trip_id[i+1]:       
        time_interval = df_124_full.departure_time[i].components.hours
        total_travelling_time = df_124_full.arrival_time[i+1] -df_124_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_124.append(x)

travelling_time_124 = pd.DataFrame(travelling_time_124)
travelling_time_124.head()

travelling_time_124.columns = ["124_day_hour", "total_travelling_time"]

travelling_time_124["total_travelling_time"] = travelling_time_124["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_124["total_travelling_time"] = travelling_time_124["total_travelling_time"].astype(float)
travelling_time_124["total_travelling_time"] = travelling_time_124["total_travelling_time"].abs()
travelling_time_124.describe()

travelling_time_124.groupby(["124_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
124_day_hour,Unnamed: 1_level_1
9,13.0
10,13.0
18,13.0
19,13.0
20,13.0
21,13.0


## Bus 125

In [76]:
df_125_a = df_125[(df_125["start_stop"] == True)]
df_125_b = df_125[(df_125["destination"] == True)]
df_125_full = pd.concat([df_125_a, df_125_b]).sort_values(by=['trip_id'])
df_125_full =df_125_full.reset_index(drop=True)

travelling_time_125 = []
for i in range(df_125_full.shape[0]-1):
    if df_125_full.trip_id[i] == df_125_full.trip_id[i+1]:       
        time_interval = df_125_full.departure_time[i].components.hours
        total_travelling_time = df_125_full.arrival_time[i+1] -df_125_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_125.append(x)

travelling_time_125 = pd.DataFrame(travelling_time_125)
travelling_time_125.head()

travelling_time_125.columns = ["125_day_hour", "total_travelling_time"]

travelling_time_125["total_travelling_time"] = travelling_time_125["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_125["total_travelling_time"] = travelling_time_125["total_travelling_time"].astype(float)
travelling_time_125["total_travelling_time"] = travelling_time_125["total_travelling_time"].abs()
travelling_time_125.describe()

travelling_time_125.groupby(["125_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
125_day_hour,Unnamed: 1_level_1
7,19.0
8,19.0
9,19.0
10,19.5
11,19.8
12,20.5
13,19.666667
14,20.0
15,18.4
16,19.5


## Bus 126

In [77]:
df_126_a = df_126[(df_126["start_stop"] == True)]
df_126_b = df_126[(df_126["destination"] == True)]
df_126_full = pd.concat([df_126_a, df_126_b]).sort_values(by=['trip_id'])
df_126_full =df_126_full.reset_index(drop=True)

travelling_time_126 = []
for i in range(df_126_full.shape[0]-1):
    if df_126_full.trip_id[i] == df_126_full.trip_id[i+1]:       
        time_interval = df_126_full.departure_time[i].components.hours
        total_travelling_time = df_126_full.arrival_time[i+1] -df_126_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_126.append(x)

travelling_time_126 = pd.DataFrame(travelling_time_126)
travelling_time_126.head()

travelling_time_126.columns = ["126_day_hour", "total_travelling_time"]

travelling_time_126["total_travelling_time"] = travelling_time_126["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_126["total_travelling_time"] = travelling_time_126["total_travelling_time"].astype(float)
travelling_time_126["total_travelling_time"] = travelling_time_126["total_travelling_time"].abs()
travelling_time_126.describe()

travelling_time_126.groupby(["126_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
126_day_hour,Unnamed: 1_level_1
7,30.25
8,32.75
9,27.7
10,28.181818
11,28.5
12,28.545455
13,28.7
14,26.9
15,27.2
16,27.444444


## Bus H12

In [80]:
df_H12_a = df_H12[(df_H12["start_stop"] == True)]
df_H12_b = df_H12[(df_H12["destination"] == True)]
df_H12_full = pd.concat([df_H12_a, df_H12_b]).sort_values(by=['trip_id'])
df_H12_full =df_H12_full.reset_index(drop=True)

travelling_time_H12 = []
for i in range(df_H12_full.shape[0]-1):
    if df_H12_full.trip_id[i] == df_H12_full.trip_id[i+1]:       
        time_interval = df_H12_full.departure_time[i].components.hours
        total_travelling_time = df_H12_full.arrival_time[i+1] -df_H12_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_H12.append(x)

travelling_time_H12 = pd.DataFrame(travelling_time_H12)
travelling_time_H12.head()

travelling_time_H12.columns = ["H12_day_hour", "total_travelling_time"]

travelling_time_H12["total_travelling_time"] = travelling_time_H12["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_H12["total_travelling_time"] = travelling_time_H12["total_travelling_time"].astype(float)
travelling_time_H12["total_travelling_time"] = travelling_time_H12["total_travelling_time"].abs()
travelling_time_H12.describe()

travelling_time_H12.groupby(["H12_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
H12_day_hour,Unnamed: 1_level_1
6,46.666667
7,54.428571
8,57.857143
9,60.4
10,58.5
11,56.666667
12,58.0
13,57.333333
14,56.777778
15,57.444444


## Bus V19

In [81]:
df_V19_a = df_V19[(df_V19["start_stop"] == True)]
df_V19_b = df_V19[(df_V19["destination"] == True)]
df_V19_full = pd.concat([df_V19_a, df_V19_b]).sort_values(by=['trip_id'])
df_V19_full =df_V19_full.reset_index(drop=True)

travelling_time_V19 = []
for i in range(df_V19_full.shape[0]-1):
    if df_V19_full.trip_id[i] == df_V19_full.trip_id[i+1]:       
        time_interval = df_V19_full.departure_time[i].components.hours
        total_travelling_time = df_V19_full.arrival_time[i+1] -df_V19_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_V19.append(x)

travelling_time_V19 = pd.DataFrame(travelling_time_V19)
travelling_time_V19.head()

travelling_time_V19.columns = ["V19_day_hour", "total_travelling_time"]

travelling_time_V19["total_travelling_time"] = travelling_time_V19["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_V19["total_travelling_time"] = travelling_time_V19["total_travelling_time"].astype(float)
travelling_time_V19["total_travelling_time"] = travelling_time_V19["total_travelling_time"].abs()
travelling_time_V19.describe()

travelling_time_V19.groupby(["V19_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
V19_day_hour,Unnamed: 1_level_1
6,49.0
7,51.0
8,59.0
10,60.0
11,68.0
12,68.0
13,68.0
14,68.0
15,63.0
16,63.0


## Bus D20

In [82]:
df_D20_a = df_D20[(df_D20["start_stop"] == True)]
df_D20_b = df_D20[(df_D20["destination"] == True)]
df_D20_full = pd.concat([df_D20_a, df_D20_b]).sort_values(by=['trip_id'])
df_D20_full =df_D20_full.reset_index(drop=True)

travelling_time_D20 = []
for i in range(df_D20_full.shape[0]-1):
    if df_D20_full.trip_id[i] == df_D20_full.trip_id[i+1]:       
        time_interval = df_D20_full.departure_time[i].components.hours
        total_travelling_time = df_D20_full.arrival_time[i+1] -df_D20_full.arrival_time[i]
        x = [time_interval,total_travelling_time]
        travelling_time_D20.append(x)

travelling_time_D20 = pd.DataFrame(travelling_time_D20)
travelling_time_D20.head()

travelling_time_D20.columns = ["D20_day_hour", "total_travelling_time"]

travelling_time_D20["total_travelling_time"] = travelling_time_D20["total_travelling_time"]/pd.Timedelta('1 minutes')
travelling_time_D20["total_travelling_time"] = travelling_time_D20["total_travelling_time"].astype(float)
travelling_time_D20["total_travelling_time"] = travelling_time_D20["total_travelling_time"].abs()
travelling_time_D20.describe()

travelling_time_D20.groupby(["D20_day_hour"]).mean()

Unnamed: 0_level_0,total_travelling_time
D20_day_hour,Unnamed: 1_level_1
6,38.5
7,40.0
8,42.333333
9,45.0
10,49.0
11,54.5
12,57.666667
13,57.25
14,57.0
15,57.0
