In [1]:
import pandas as pd

## Load the unlabelled dataset ##

In [3]:
df = pd.read_csv('data/from-azure/nathan_AllData.csv')

In [4]:
features_cols = ['GridRows', 'GridColumns', 'NumOfAgents', 'NumOfObstacles', 'BranchingFactor', 'ObstacleDensity',
            'AvgDistanceToGoal', 'MaxDistanceToGoal', 'MinDistanceToGoal', 'AvgStartDistances', 'AvgGoalDistances',
            'PointsAtSPRatio']
runtime_cols = list(df.filter(like="Runtime"))
relev_cols = runtime_cols + features_cols + ['Y']

In [5]:
runtime_cols

['A*+OD+ID Runtime',
 'MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime',
 'Basic-CBS/(A*/SIC)+ID Runtime',
 'ICTS 3E +ID Runtime',
 'EPEA*+ID Runtime']

## Create Y Column with argmin on runtime_cols ##

In [6]:
Y = df[runtime_cols].idxmin(axis=1)
df['Y'] = Y

In [7]:
df.iloc[400][runtime_cols], df.iloc[400]['Y'] # SANITY CHECK

(A*+OD+ID Runtime                                                                 0.9112
 MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime    0.0659
 Basic-CBS/(A*/SIC)+ID Runtime                                                    0.8479
 ICTS 3E +ID Runtime                                                              0.7861
 EPEA*+ID Runtime                                                                 0.7612
 Name: 400, dtype: object,
 'MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime')

In [8]:
def runtime_to_success(col):
    splitted = col.split()
    splitted[-1] = "Success"
    return " ".join(splitted)

runtime_to_success("Y Runtime")

'Y Success'

In [9]:
df['Y Success'] = df.apply(lambda x:x[runtime_to_success(x['Y'])], axis=1)

In [10]:
df['Y Runtime'] = df.apply(lambda x:x[x['Y']], axis=1)

In [11]:
df['Y'].value_counts()

MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime    1416
ICTS 3E +ID Runtime                                                               678
EPEA*+ID Runtime                                                                  527
Basic-CBS/(A*/SIC)+ID Runtime                                                     262
A*+OD+ID Runtime                                                                  227
Name: Y, dtype: int64

## Add Sparsity feature 

In [12]:
df['GridSize'] = df['GridRows'] * df['GridColumns']

In [13]:
df['Sparsity'] = df.apply(lambda x: x['NumOfAgents']/(x['GridSize']-x['NumOfObstacles']),axis=1)

In [14]:
features_cols.append('GridSize')
features_cols.append('Sparsity')


In [15]:
features_cols.append('Y')
df[features_cols].corr()

Unnamed: 0,GridRows,GridColumns,NumOfAgents,NumOfObstacles,BranchingFactor,ObstacleDensity,AvgDistanceToGoal,MaxDistanceToGoal,MinDistanceToGoal,AvgStartDistances,AvgGoalDistances,PointsAtSPRatio,GridSize,Sparsity
GridRows,1.0,0.872173,-0.096604,0.64891,-0.013183,0.704447,0.828292,0.83199,0.323697,0.813816,0.843779,-0.65494,0.735142,-0.461724
GridColumns,0.872173,1.0,-0.098144,0.919174,-0.008805,0.551495,0.93771,0.957417,0.388462,0.922472,0.903642,-0.441691,0.962286,-0.303268
NumOfAgents,-0.096604,-0.098144,1.0,-0.117408,0.070088,-0.337693,-0.134844,-0.091737,-0.204607,-0.07309,-0.064444,0.56377,-0.104255,0.127493
NumOfObstacles,0.64891,0.919174,-0.117408,1.0,-0.004321,0.409628,0.861013,0.888665,0.376622,0.847172,0.785064,-0.206216,0.989871,-0.119136
BranchingFactor,-0.013183,-0.008805,0.070088,-0.004321,0.0,-0.016705,-0.009423,-0.008381,-0.00489,-0.00857,-0.009472,0.036032,-0.005993,0.016019
ObstacleDensity,0.704447,0.551495,-0.337693,0.409628,-0.016705,1.0,0.512407,0.499165,0.255249,0.492124,0.511576,-0.707306,0.436564,-0.433139
AvgDistanceToGoal,0.828292,0.93771,-0.134844,0.861013,-0.009423,0.512407,1.0,0.979618,0.534851,0.929594,0.918294,-0.435663,0.90299,-0.296951
MaxDistanceToGoal,0.83199,0.957417,-0.091737,0.888665,-0.008381,0.499165,0.979618,1.0,0.381073,0.970737,0.959602,-0.410277,0.929464,-0.284543
MinDistanceToGoal,0.323697,0.388462,-0.204607,0.376622,-0.00489,0.255249,0.534851,0.381073,1.0,0.21676,0.197898,-0.217284,0.382635,-0.131148
AvgStartDistances,0.813816,0.922472,-0.07309,0.847172,-0.00857,0.492124,0.929594,0.970737,0.21676,1.0,0.97969,-0.405431,0.889449,-0.283385


## Save labelled data to file

In [17]:
# df.to_csv("data/from-azure/nathan_AllData-labelled.csv")

## Adding ranking (in order to train LTR models) ##

# In order to add 'bridges' feature use CreateGridFromMapFile notebook