In [287]:
import pandas as pd

## Load the unlabelled dataset ##

In [288]:
df = pd.read_csv('data/from-vpn/AllData.csv')
df.index

RangeIndex(start=0, stop=12707, step=1)

In [289]:
features_cols = ['GridRows', 'GridColumns', 'NumOfAgents', 'NumOfObstacles', 'BranchingFactor', 'ObstacleDensity',
            'AvgDistanceToGoal', 'MaxDistanceToGoal', 'MinDistanceToGoal', 'AvgStartDistances', 'AvgGoalDistances',
            'PointsAtSPRatio']
runtime_cols = list(df.filter(like="Runtime"))
relev_cols = runtime_cols + features_cols + ['Y']

In [290]:
runtime_cols

['A*+OD+ID Runtime',
 'MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime',
 'Basic-CBS/(A*/SIC)+ID Runtime',
 'ICTS 3E +ID Runtime',
 'EPEA*+ID Runtime',
 'CBS/(A*/SIC) + BP + PC without smart tie breaking using Dynamic Lazy Open List with Heuristic MVC of Cardinal Conflict Graph Heuristic Runtime']

## Create Y Column with argmin on runtime_cols ##

In [291]:
df[runtime_cols]

Unnamed: 0,A*+OD+ID Runtime,MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime,Basic-CBS/(A*/SIC)+ID Runtime,ICTS 3E +ID Runtime,EPEA*+ID Runtime,CBS/(A*/SIC) + BP + PC without smart tie breaking using Dynamic Lazy Open List with Heuristic MVC of Cardinal Conflict Graph Heuristic Runtime
0,52.9984,29.3584,13.6856,13.4931,13.6623,47.9660
1,28.9111,0.6880,26.9315,26.8701,27.0537,0.6992
2,40.0169,0.9745,37.5616,36.3021,37.5773,1.0933
3,55.3603,1.1923,50.6594,50.5308,55.6189,2.5423
4,66.5297,1.7158,71.1927,69.3344,69.8084,1.5138
5,71.8543,1.7219,74.9541,72.9232,75.0804,1.7908
6,93.6302,192.8892,103.5559,98.4557,104.1865,71.6620
7,178.4453,14.0619,115.7099,119.4731,119.8170,7.7881
8,148.0034,11.2774,145.6143,139.7179,145.0831,7.8996
9,168.3577,41.1533,171.5534,178.0229,172.5090,79.4687


In [292]:
Y = df[runtime_cols].idxmin(axis=1)
df['Y'] = Y

In [293]:
df.iloc[400][runtime_cols], df.iloc[400]['Y'] # SANITY CHECK

(A*+OD+ID Runtime                                                                                                                                  18115.4
 MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime                                                                      300000
 Basic-CBS/(A*/SIC)+ID Runtime                                                                                                                      300000
 ICTS 3E +ID Runtime                                                                                                                               16961.6
 EPEA*+ID Runtime                                                                                                                                  17004.7
 CBS/(A*/SIC) + BP + PC without smart tie breaking using Dynamic Lazy Open List with Heuristic MVC of Cardinal Conflict Graph Heuristic Runtime     300000
 Name: 400, dtype: object, 'ICTS 3E +ID Runtime')

In [294]:
def runtime_to_success(col):
    splitted = col.split()
    splitted[-1] = "Success"
    return " ".join(splitted)

runtime_to_success("Y Runtime")

'Y Success'

In [295]:
df['Y Success'] = df.apply(lambda x:x[runtime_to_success(x['Y'])], axis=1)

In [296]:
df['Y Runtime'] = df.apply(lambda x:x[x['Y']], axis=1)

In [297]:
df['Y'].value_counts()

CBS/(A*/SIC) + BP + PC without smart tie breaking using Dynamic Lazy Open List with Heuristic MVC of Cardinal Conflict Graph Heuristic Runtime    5847
MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime                                                                     3396
ICTS 3E +ID Runtime                                                                                                                               1612
EPEA*+ID Runtime                                                                                                                                  1372
A*+OD+ID Runtime                                                                                                                                   357
Basic-CBS/(A*/SIC)+ID Runtime                                                                                                                      123
Name: Y, dtype: int64

## Add Sparsity feature 

In [298]:
df['GridSize'] = df['GridRows'] * df['GridColumns']

In [299]:
df['Sparsity'] = df.apply(lambda x: x['NumOfAgents']/(x['GridSize']-x['NumOfObstacles']),axis=1)

In [300]:
features_cols.append('GridSize')
features_cols.append('Sparsity')


In [301]:
features_cols.append('Y')

## Save labelled data to file

In [302]:
df.to_csv("data/from-vpn/AllData-labelled.csv", index=False)

## Adding ranking (in order to train LTR models) ##

# In order to add 'bridges' feature use CreateGridFromMapFile notebook