In [1]:
import pandas as pd
import os
import glob
import traceback
from collections import OrderedDict
import csv

In [11]:
def calculating_percent_done(route, direction, df_trips, df_leave):
    ''' Takes in route, direction and 2 data frames
    
    Returns percentage of Journey completed per stop per route and saves to CSV
    
    '''
    try:
    

        print('Starting...', route )    
        #change to appropriate datatypes
        df_leave['DATEOFSERVICE'] = pd.to_datetime(df_leave['DAYOFSERVICE'], format='%d-%b-%y %H:%M:%S').dt.date
        df_trips['DATEOFSERVICE'] = pd.to_datetime(df_trips['Dates'], format='%Y-%m-%d %H:%M:%S').dt.date

        #merge the dataframes
        final_df = pd.merge( df_trips, df_leave,how='inner', on=['TRIPID','DATEOFSERVICE'])
        #create a full journey column 
        final_df['FullJourney'] = (final_df['ACTUALTIME_ARR'] - final_df['ACTUALTIME_DEP'])/60
        #calculate time of the journey completed per stop 
        final_df['TimeSinceBeginning'] = (final_df['ATA_STOPS'] - final_df['ACTUALTIME_DEP'])/60

        #delete unnecessary cols
        cols_to_delete = [ 'PLANNEDTIME_ARR',
           'PLANNEDTIME_DEP', 'ACTUALTIME_ARR', 'ACTUALTIME_DEP','DelayedArr', 'DelayedDep', 'Dates', 'temp', 'feels_like', 'humidity',
           'wind_speed', 'rain_1h', 'clouds_all', 'weather_main', 'DATEOFSERVICE',
            'DAYOFSERVICE','PTA_STOPS',
           'PTD_STOPS', 'ATA_STOPS', 'ATD_STOPS']

        for col in cols_to_delete:
            del final_df[col]
        
        dict_for_max={}
        num_sub_routes = df_trips['ROUTEID'].unique().tolist()
        
        if len(num_sub_routes) >1:
            for sub in num_sub_routes:
                temp = final_df.loc[final_df['ROUTEID'] == sub]
                progrs = temp['PROGRNUMBER'].unique().tolist()
                dict_for_max[sub] = len(progrs)
                max_key = max(dict_for_max, key=dict_for_max.get)
                final_df = final_df.loc[final_df['ROUTEID']== max_key]
                
        #get a percentage of the journey done     
        final_df['PERCENTAGE'] = ((final_df['TimeSinceBeginning']) / (final_df['FullJourney'])) * 100
        #sort by progrnumber so stops are in sequence
        final_df.sort_values('PROGRNUMBER')

        stops=final_df['STOPPOINTID'].unique().tolist()
        #create a dictionary of progrnumbers and stops that are in order                                                                        
        stops_dict={}
        PROGRNUMBERs= final_df['PROGRNUMBER'].unique().tolist()
        STOPPOINTID=  final_df['STOPPOINTID'].unique().tolist()
        for prog, stop in zip(PROGRNUMBERs,STOPPOINTID ):
            stops_dict[prog]=stop
        dict1 = OrderedDict(sorted(stops_dict.items()))
        stops_dict =dict(dict1)

        #create a dict of the stops and the corresponding percentage of journey done
        percent_dict={}
        for stop in stops:
            df_temp= final_df.loc[final_df['STOPPOINTID'] == stop]
            mean_percent=df_temp["PERCENTAGE"].mean()


            if mean_percent <= 1:
                percent_dict[stop]=0

            percent_dict[stop]=round(mean_percent,2)

        templist = sorted(percent_dict.items(), key=lambda x:x[1])
        sortdict = dict(templist)

        percentss = sortdict.values()
        order_of_stops = stops_dict.keys()
        stopid =stops_dict.values()

        zipped = zip(order_of_stops, stopid, percentss)

        try:
            with open('./accuracies/' + route + '_' + direction + '_Percents.csv', 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(('StopOrder', 'StopID', 'PercentDone'))
                writer.writerows(zipped)

            print(route, "Done")       
        except:
            print("Error saving...",route)
            traceback.print_exc()
        
    except:
        print("Error with ", route + direction)
        traceback.print_exc()

                                
                        

In [3]:
leave_routes=[]
for route in glob.glob(os.path.join('./inbound_routes', '*.csv')): 
  
    routes = (route[17:-4])
    
    leave_routes.append(routes)

trip_routes=[]
for trip in glob.glob(os.path.join('./Directions', '*.csv')): 
    
    route_trip = (trip[13:-4])
    trip_routes.append(route_trip)

In [4]:
leave_routes
trip_routes

for file in leave_routes:
    
    for i in range(len(trip_routes)):
        if file == trip_routes[i]:
            route = (file.split('_', 1)[0])
            with open('./Directions/' + trip_routes[i] + '.csv', 'r') as t: 
                
                df_trips = pd.read_csv(t)
            with open('./inbound_routes/' + file + '.csv', 'r') as f:
                df_leave = pd.read_csv(f)
            calculating_percent_done(route, 'IB', df_trips, df_leave)

Starting... 102
102 Done
Starting... 104
104 Done
Starting... 111
111 Done
Starting... 114
114 Done
Starting... 116
116 Done
Starting... 118
118 Done
Starting... 11
11 Done
Starting... 120
120 Done
Starting... 122
122 Done
Starting... 123
123 Done
Starting... 130
130 Done
Starting... 13
13 Done
Starting... 140
140 Done
Starting... 142
142 Done
Starting... 145
145 Done
Starting... 14C
14C Done
Starting... 14
14 Done
Starting... 150
150 Done
Starting... 151
151 Done
Starting... 15A
15A Done
Starting... 15B
15B Done
Starting... 15D
15D Done
Starting... 15
15 Done
Starting... 161
161 Done
Starting... 16C
16C Done
Starting... 16
16 Done
Starting... 17A
17A Done
Starting... 17
17 Done
Starting... 184
184 Done
Starting... 185
185 Done
Starting... 18
18 Done
Starting... 1
1 Done
Starting... 220
220 Done
Starting... 236
236 Done
Starting... 238
238 Done
Starting... 239
239 Done
Starting... 25A
25A Done
Starting... 25B
25B Done
Starting... 25D
25D Done
Starting... 25X
25X Done
Starting... 25
25 

In [8]:
leave_routes_outbound=[]
for route in glob.glob(os.path.join('./outbounds/outbound', '*.csv')): 
  
    routes = (route[21:-4])
    
    leave_routes_outbound.append(routes)

In [9]:
trip_routes

['102_IB',
 '102_OB',
 '104_IB',
 '104_OB',
 '111_IB',
 '111_OB',
 '114_IB',
 '114_OB',
 '116_IB',
 '116_OB',
 '118_IB',
 '11_IB',
 '11_OB',
 '120_IB',
 '120_OB',
 '122_IB',
 '122_OB',
 '123_IB',
 '123_OB',
 '130_IB',
 '130_OB',
 '13_IB',
 '13_OB',
 '140_IB',
 '140_OB',
 '142_IB',
 '142_OB',
 '145_IB',
 '145_OB',
 '14C_IB',
 '14C_OB',
 '14_IB',
 '14_OB',
 '150_IB',
 '150_OB',
 '151_IB',
 '151_OB',
 '15A_IB',
 '15A_OB',
 '15B_IB',
 '15B_OB',
 '15D_IB',
 '15D_OB',
 '15_IB',
 '15_OB',
 '161_IB',
 '161_OB',
 '16C_IB',
 '16C_OB',
 '16D_OB',
 '16_IB',
 '16_OB',
 '17A_IB',
 '17A_OB',
 '17_IB',
 '17_OB',
 '184_IB',
 '184_OB',
 '185_IB',
 '185_OB',
 '18_IB',
 '18_OB',
 '1_IB',
 '1_OB',
 '220_IB',
 '220_OB',
 '236_IB',
 '236_OB',
 '238_IB',
 '238_OB',
 '239_IB',
 '239_OB',
 '25A_IB',
 '25A_OB',
 '25B_IB',
 '25B_OB',
 '25D_IB',
 '25D_OB',
 '25X_IB',
 '25X_OB',
 '25_IB',
 '25_OB',
 '26_IB',
 '26_OB',
 '270_IB',
 '270_OB',
 '27A_IB',
 '27A_OB',
 '27B_IB',
 '27B_OB',
 '27X_IB',
 '27X_OB',
 '27_IB',


In [12]:

for file in leave_routes_outbound:
    
    for i in range(len(trip_routes)):
        if file == trip_routes[i]:
            route = (file.split('_', 1)[0])
            with open('./Directions/' + trip_routes[i] + '.csv', 'r') as t: 
                
                df_trips = pd.read_csv(t)
            with open('./outbounds/outbound/' + file + '.csv', 'r') as f:
                df_leave = pd.read_csv(f)
            calculating_percent_done(route, 'OB', df_trips, df_leave)

Starting... 102
102 Done
Starting... 104
104 Done
Starting... 111
111 Done
Starting... 114
114 Done
Starting... 116
116 Done
Starting... 11
11 Done
Starting... 120
120 Done
Starting... 122
122 Done
Starting... 123
123 Done
Starting... 130
130 Done
Starting... 13
13 Done
Starting... 140
140 Done
Starting... 142
142 Done
Starting... 145
145 Done
Starting... 14C
14C Done
Starting... 14
14 Done
Starting... 150
150 Done
Starting... 151
151 Done
Starting... 15A
15A Done
Starting... 15B
15B Done
Starting... 15D
15D Done
Starting... 15
15 Done
Starting... 161
161 Done
Starting... 16C
16C Done
Starting... 16D
16D Done
Starting... 16
16 Done
Starting... 17A
17A Done
Starting... 17
17 Done
Starting... 184
184 Done
Starting... 185
185 Done
Starting... 18
18 Done
Starting... 1
1 Done
Starting... 220
220 Done
Starting... 236
236 Done
Starting... 238
238 Done
Starting... 239
239 Done
Starting... 25A
25A Done
Starting... 25B
25B Done
Starting... 25D
25D Done
Starting... 25X
25X Done
Starting... 25
25 