# Comparison of performance Online Algorithms

**Simulations of randomly generated group sequences on a selection of grids**

In [1]:
from utils.test_file import generate_group_sequence
from algorithms.online import FirstFit, BestFit, WorstFit, MinCovidChairs, Hybrid_BF_CC
import pandas as pd
import numpy as np
import pickle
import plotly.express as px
import itertools
from online_batch import run_algorithm_with_original_groups, repeat_algorithm_with_different_groups, get_file_names

## Set variables For simulation

We need to define:

* which files (=grids) we are going to run
* how many random groups we gonna test per grids
* the number of groups
* which algorithms

In [2]:
FILE_DIR = "input/online"
# 20 different group sequences from each 50 groups long
GROUPS_LIST = [generate_group_sequence(50) for i in range(20)]
ALGORITHMS = [FirstFit, BestFit, WorstFit, MinCovidChairs, 
    # Hybrid_BF_CC
    ]
SEED = 10

## Run the simulation

Gather all the files that we want to run on.

In [3]:
file_names = get_file_names(FILE_DIR)

Run the algorithms and gather the results in a dataframe

In [5]:
%%capture
result = {}
for file in file_names:
    grid_path = f"{FILE_DIR}/{file}"

    grid_result = {}
    for algorithm in ALGORITHMS:
        alg_filled_chairs = []
        alg_filled_chairs.append(run_algorithm_with_original_groups(algorithm, grid_path))
        alg_filled_chairs = alg_filled_chairs + repeat_algorithm_with_different_groups(algorithm, grid_path, GROUPS_LIST)
        alg_name = str(algorithm.__name__)
        grid_result.update({alg_name: alg_filled_chairs})
    
    result.update({file: grid_result})


In [6]:
print("The following grids were solved:")    
print(result.keys())

The following grids were solved:
dict_keys(['Online1.txt', 'Online10.txt', 'Online11.txt', 'Online12.txt', 'Online2.txt', 'Online3.txt', 'Online4.txt', 'Online5.txt', 'Online6.txt', 'Online7.txt', 'Online8.txt', 'Online9.txt'])


Put everything in a dataframe:

In [7]:
df_list = []
for grid, algs in result.items():
    alg_series = []
    for alg, value_list in algs.items():
        alg_series.append(pd.Series(value_list, name=alg))
    df = pd.DataFrame(alg_series).transpose()
    df = df.assign(grid=grid[0:-4]).set_index('grid', append=True, drop=True)
    df_list.append(df)
df = pd.concat(df_list)

## Preprocessing

Let's extract the records with the original groups from the input grid files, and put it in a seperate dataframe

In [8]:
# The first group is the original group included in the grid file
df_real_groups = df.xs(0, level=None)
df_real_groups

Unnamed: 0_level_0,FirstFit,BestFit,WorstFit,MinCovidChairs
grid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Online1,5,5,5,6
Online10,74,67,73,82
Online11,45,52,39,52
Online12,144,147,120,148
Online2,9,9,5,5
Online3,15,11,13,15
Online4,12,12,12,12
Online5,22,26,22,22
Online6,29,29,27,33
Online7,41,41,31,34


And export the dataframe, so it can be used elsewhere to do comparison with results from offline algorithm

In [9]:
df_real_groups.to_pickle("results/online_results.p")

Gather the other data to separate dataframe for the generated group simulation records: 

In [10]:
# All the groups except group [0] belongs to the simulation
df_sim = df.loc[pd.IndexSlice[range(1,len(GROUPS_LIST)+1),:]]
df_sim

Unnamed: 0_level_0,Unnamed: 1_level_0,FirstFit,BestFit,WorstFit,MinCovidChairs
Unnamed: 0_level_1,grid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Online1,4,6,3,6
1,Online10,68,68,65,77
1,Online11,60,58,48,62
1,Online12,129,130,109,139
1,Online2,8,8,6,7
...,...,...,...,...,...
20,Online5,26,30,26,28
20,Online6,28,31,26,31
20,Online7,37,37,39,41
20,Online8,46,50,44,50


## Analysis

**The following analyses are on the simulation results.**

### Mean seats filled per algorithm per grid

The mean is taken over all the group sequences that were generated

In [11]:
df_sim_mean = df_sim.groupby(['grid']).mean().reset_index().melt(id_vars='grid', value_name='seats', var_name='algorithm')
# df_sim_avg

In [12]:
df_sim_std = df_sim.groupby(['grid']).std().reset_index().melt(id_vars='grid', value_name='std', var_name='algorithm')
# df_sim_std

In [13]:
df_sim_comb = df_sim_mean.merge(df_sim_std, on=['grid', 'algorithm'])
# df_sim_comb

The mean performance of each algorithm per grid:

In [14]:
bar_mean = px.bar(df_sim_comb, x='grid', color='algorithm', y='seats', barmode='group', error_y='std')
bar_mean.show()

### Best performance of each algorithm per grid:

In [15]:
df_sim_max = df_sim.groupby(['grid']).max().reset_index().melt(id_vars='grid', value_name='seats', var_name='algorithm')
px.bar(df_sim_max, x='grid', color='algorithm', y='seats', barmode='group').show()

### Ranking of algorithm in respect to each other per grid

In [16]:
# For each grid - group_sequence combination, rank the algorithms in respect to each other
df_sim.index.names = ['group_sequence', 'grid']
df_ranking = df_sim.reset_index().melt(id_vars=['group_sequence','grid'], value_name='seats', var_name='algorithm').sort_values(['grid', 'group_sequence'])
rank = df_ranking.groupby(['grid', 'group_sequence'])
rank = rank['seats'].rank(method='min', ascending=False)
df_ranking['rank'] = rank.astype('int16')
# df_ranking # .head(n=50)

Mean ranking of each algorithm per grid over all the simulations

In [17]:
df_ranking_mean = df_ranking.groupby(['grid', 'algorithm']).mean()
df_ranking_mean.assign(**{'base': 5})

Unnamed: 0_level_0,Unnamed: 1_level_0,group_sequence,seats,rank,base
grid,algorithm,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Online1,BestFit,10.5,5.7,1.3,5
Online1,FirstFit,10.5,5.4,1.65,5
Online1,MinCovidChairs,10.5,6.45,1.0,5
Online1,WorstFit,10.5,3.95,3.35,5
Online10,BestFit,10.5,68.95,2.55,5
Online10,FirstFit,10.5,71.75,2.05,5
Online10,MinCovidChairs,10.5,75.15,1.1,5
Online10,WorstFit,10.5,67.3,3.65,5
Online11,BestFit,10.5,58.3,2.1,5
Online11,FirstFit,10.5,56.65,2.65,5


In [18]:
# px.scatter(df_ranking_mean.assign(**{'base': 5}), 
#     # base='base', 
#     x=df_ranking_mean.index.get_level_values(0), color=df_ranking_mean.index.get_level_values(1), y='rank').update_yaxes(range=[5, 1])

In [19]:
# grid_rank = df_ranking.groupby(['grid', 'algorithm'])
# grid_rank = grid_rank['rank'].rank(method='min', ascending=False)
# grid_rank
# # ranking_per_grid = df_ranking['rank'] = grid_rank
# # ranking_per_grid

For each grid: the rank frequency of the algorithm:

In [20]:
# Rank per 
def get_cum_ranking_per_rank(x):
    result = x['rank'].value_counts().to_frame('count')
    return result
df_ranking_result = df_ranking.groupby(['grid', 'algorithm']).apply(get_cum_ranking_per_rank)
df_ranking_result.index.names = ['grid', 'algorithm', 'rank']
df_ranking_result = df_ranking_result.reset_index(level='rank').pivot(columns=['rank']).fillna(0).melt(ignore_index=False, var_name='rank', col_level=1)
# print(df_ranking_result.info())
# print(df_ranking_result.index.get_level_values(1))
df_ranking_result
# px.bar(df_ranking_result, x=df_ranking_result.index.get_level_values(0), color=df_ranking_result.index.get_level_values(1), y='rank', barmode='group').show()

Unnamed: 0_level_0,Unnamed: 1_level_0,rank,value
grid,algorithm,Unnamed: 2_level_1,Unnamed: 3_level_1
Online1,BestFit,1,14.0
Online1,FirstFit,1,11.0
Online1,MinCovidChairs,1,20.0
Online1,WorstFit,1,1.0
Online10,BestFit,1,1.0
...,...,...,...
Online8,WorstFit,4,4.0
Online9,BestFit,4,1.0
Online9,FirstFit,4,0.0
Online9,MinCovidChairs,4,0.0
