# Imports

In [26]:
import dijkstra
import astar
import pandas
import data_loader
from timer import Timer
import cli_utils

pandas.set_option('display.max_rows', None)
pandas.set_option('display.max_columns', None)

# Loading data

In [27]:
graph= data_loader.load_data_to_graph(normalize_coordinates=True)
graph_dict = graph.graph_dict

# Test data

In [28]:
bus_stops = [
    ('wrocławski park przemysłowy','pl. grunwaldzki'),
    ('grabiszyńska','pl. daniłowskiego'),
    ('kwiska','pl. grunwaldzki'),
    ('krzyki','leśnica'),
    ('leśnica','biskupin'),
    ('wrocławski park przemysłowy','dworzec autobusowy'),
    ('fat','osiedle sobieskiego')
]
bus_stops_test_data = [(graph_dict[start_stop], graph_dict[end_stop]) for start_stop, end_stop in bus_stops]

start_times = ['9:00','9:15','9:30', '2:00', '2:15', '2:30']
start_times_test_data = [cli_utils.normalize_input_time(time) for time in start_times]

# Data analysis

In [29]:
data = []
configs = {
    'Dijkstra for time' : lambda start_stop, end_stop, start_time: dijkstra.shortest_path(graph_dict=graph_dict, start_stop=start_stop.name.lower(), goal_stop=end_stop.name.lower(), start_time=start_time, cost_fn=dijkstra.DEFAULT_COST_FUNCTION),
    'A* for time' : lambda start_stop, end_stop, start_time: astar.astar(start=start_stop, goal=end_stop, start_time=start_time, cost_fn=astar.TIME_COST_FUNCTION, heuristic_fn=astar.TIME_HEURISTIC),
    'A* for line changes' : lambda start_stop, end_stop, start_time: astar.astar(start=start_stop, goal=end_stop, start_time=start_time, cost_fn=astar.LINE_CHANGE_COST_FUNCTION, heuristic_fn=astar.LINE_CHANGE_HEURISTIC)
}

timer = Timer()

for start_time in start_times_test_data:
    for start_stop, end_stop in bus_stops_test_data:
        for config_name, function in configs.items():
            
            cost, _ = timer.run(lambda : function(start_stop, end_stop, start_time))
            computation_time = timer.elapsed_time
            
            record = config_name, start_stop, end_stop, cli_utils.convert_normalized_time(start_time), cost, computation_time
            data.append(record)

columns = ['Algorithm', 'Start stop', 'End stop', 'Start time', 'Cost', 'Computation time']

statistics = pandas.DataFrame(data, columns=columns)

In [30]:
statistics

Unnamed: 0,Algorithm,Start stop,End stop,Start time,Cost,Computation time
0,Dijkstra for time,Wrocławski Park Przemysłowy,PL. GRUNWALDZKI,09:00,22.0,0.824111
1,A* for time,Wrocławski Park Przemysłowy,PL. GRUNWALDZKI,09:00,22.0,0.073651
2,A* for line changes,Wrocławski Park Przemysłowy,PL. GRUNWALDZKI,09:00,0.0,0.192585
3,Dijkstra for time,Grabiszyńska,pl. Daniłowskiego,09:00,29.0,0.846802
4,A* for time,Grabiszyńska,pl. Daniłowskiego,09:00,35.0,0.147363
5,A* for line changes,Grabiszyńska,pl. Daniłowskiego,09:00,1001.0,0.160592
6,Dijkstra for time,Kwiska,PL. GRUNWALDZKI,09:00,26.0,0.843457
7,A* for time,Kwiska,PL. GRUNWALDZKI,09:00,26.0,0.112373
8,A* for line changes,Kwiska,PL. GRUNWALDZKI,09:00,0.0,0.323963
9,Dijkstra for time,KRZYKI,LEŚNICA,09:00,50.0,0.845065


In [31]:
time_optimization_statistics = statistics[statistics['Algorithm'] != 'A* for line changes']


pivoted_time_optimization_statistics = time_optimization_statistics.pivot_table(index=['Start stop', 'End stop', 'Start time'], columns='Algorithm', values=['Cost', 'Computation time'])
pivoted_time_optimization_statistics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Computation time,Computation time,Cost,Cost
Unnamed: 0_level_1,Unnamed: 1_level_1,Algorithm,A* for time,Dijkstra for time,A* for time,Dijkstra for time
Start stop,End stop,Start time,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
FAT,OSIEDLE SOBIESKIEGO,02:00,0.454658,0.799897,60.0,60.0
FAT,OSIEDLE SOBIESKIEGO,02:15,0.308995,0.792907,105.0,105.0
FAT,OSIEDLE SOBIESKIEGO,02:30,0.297767,0.817342,90.0,90.0
FAT,OSIEDLE SOBIESKIEGO,09:00,0.202379,0.843353,68.0,48.0
FAT,OSIEDLE SOBIESKIEGO,09:15,0.23435,0.842596,63.0,53.0
FAT,OSIEDLE SOBIESKIEGO,09:30,0.220441,0.819201,53.0,48.0
Grabiszyńska,pl. Daniłowskiego,02:00,0.426093,0.789875,51.0,51.0
Grabiszyńska,pl. Daniłowskiego,02:15,0.304209,0.82158,96.0,96.0
Grabiszyńska,pl. Daniłowskiego,02:30,0.292859,0.856029,81.0,81.0
Grabiszyńska,pl. Daniłowskiego,09:00,0.147363,0.846802,35.0,29.0


In [32]:
average_cost_per_algorithm_time_optimization = pivoted_time_optimization_statistics['Cost'].mean()

# Calculate average for computation time per algorithm
average_computation_time_per_algorithm_time_optimization = pivoted_time_optimization_statistics['Computation time'].mean()

# Create a new DataFrame to store the averages
average_statistics_time_optimization = pandas.DataFrame({
    'Average Cost': average_cost_per_algorithm_time_optimization,
    'Average Computation Time': average_computation_time_per_algorithm_time_optimization
})

average_statistics_time_optimization

Unnamed: 0_level_0,Average Cost,Average Computation Time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1
A* for time,56.261905,0.221471
Dijkstra for time,51.285714,0.829213


In [33]:
line_changes_optimization_statistics = statistics[statistics['Algorithm'] == 'A* for line changes']

# Update 'Cost' if greater than or equal to 1000 and remove records where 'Cost' is infinity
line_changes_optimization_statistics = line_changes_optimization_statistics[line_changes_optimization_statistics['Cost'] != float('inf')]
line_changes_optimization_statistics.loc[line_changes_optimization_statistics['Cost'] >= 1000, 'Cost'] -= 1000

line_changes_optimization_statistics

Unnamed: 0,Algorithm,Start stop,End stop,Start time,Cost,Computation time
2,A* for line changes,Wrocławski Park Przemysłowy,PL. GRUNWALDZKI,09:00,0.0,0.192585
5,A* for line changes,Grabiszyńska,pl. Daniłowskiego,09:00,1.0,0.160592
8,A* for line changes,Kwiska,PL. GRUNWALDZKI,09:00,0.0,0.323963
11,A* for line changes,KRZYKI,LEŚNICA,09:00,3.0,0.450456
14,A* for line changes,LEŚNICA,BISKUPIN,09:00,0.0,0.237792
17,A* for line changes,Wrocławski Park Przemysłowy,DWORZEC AUTOBUSOWY,09:00,1.0,0.607067
20,A* for line changes,FAT,OSIEDLE SOBIESKIEGO,09:00,1.0,0.159256
23,A* for line changes,Wrocławski Park Przemysłowy,PL. GRUNWALDZKI,09:15,0.0,0.350793
26,A* for line changes,Grabiszyńska,pl. Daniłowskiego,09:15,1.0,0.165657
29,A* for line changes,Kwiska,PL. GRUNWALDZKI,09:15,0.0,0.318145


In [34]:
average_cost_per_algorithm_line_changes = line_changes_optimization_statistics['Cost'].mean()

# Calculate average for computation time per algorithm
average_computation_time_per_algorithm_line_changes = line_changes_optimization_statistics['Computation time'].mean()

# Create a new DataFrame to store the averages
average_statistics_line_changes = pandas.DataFrame({
    'Average Cost': [average_cost_per_algorithm_line_changes],
    'Average Computation Time': [average_computation_time_per_algorithm_line_changes]
})

average_statistics_line_changes

Unnamed: 0,Average Cost,Average Computation Time
0,1.564103,0.363295
