In [1]:
import pandas as pd

## Load the unlabelled dataset ##

In [2]:
df = pd.read_csv('data/from-azure/AllData.csv')

In [3]:
features_cols = ['GridRows', 'GridColumns', 'NumOfAgents', 'NumOfObstacles', 'BranchingFactor', 'ObstacleDensity',
            'AvgDistanceToGoal', 'MaxDistanceToGoal', 'MinDistanceToGoal', 'AvgStartDistances', 'AvgGoalDistances',
            'PointsAtSPRatio']
runtime_cols = list(df.filter(like="Runtime"))
relev_cols = runtime_cols + features_cols + ['Y']

In [4]:
runtime_cols

['A*+OD+ID Runtime',
 'MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime',
 'Basic-CBS/(A*/SIC)+ID Runtime',
 'ICTS 3E +ID Runtime',
 'EPEA*+ID Runtime']

## Create Y Column with argmax on runtime_cols ##

In [5]:
Y = df[runtime_cols].idxmin(axis=1)
df['Y'] = Y

In [6]:
def runtime_to_success(col):
    splitted = col.split()
    splitted[-1] = "Success"
    return " ".join(splitted)

runtime_to_success("Y Runtime")

'Y Success'

In [7]:
df['Y Success'] = df.apply(lambda x:x[runtime_to_success(x['Y'])], axis=1)

In [8]:
df['Y Runtime'] = df.apply(lambda x:x[x['Y']], axis=1)

In [9]:
df['Y'].value_counts()

MA-CBS-Global-10/(EPEA*/SIC) choosing the first conflict in CBS nodes Runtime    3306
ICTS 3E +ID Runtime                                                               997
EPEA*+ID Runtime                                                                  760
Basic-CBS/(A*/SIC)+ID Runtime                                                     631
A*+OD+ID Runtime                                                                  284
Name: Y, dtype: int64

## Add Sparsity feature 

In [10]:
df['GridSize'] = df['GridRows'] * df['GridColumns']

In [11]:
df['Sparsity'] = df.apply(lambda x: x['NumOfAgents']/(x['GridSize']-x['NumOfObstacles']),axis=1)

In [12]:
features_cols.append('GridSize')
features_cols.append('Sparsity')


In [13]:
features_cols.append('Y')
df[features_cols].corr()

Unnamed: 0,GridRows,GridColumns,NumOfAgents,NumOfObstacles,BranchingFactor,ObstacleDensity,AvgDistanceToGoal,MaxDistanceToGoal,MinDistanceToGoal,AvgStartDistances,AvgGoalDistances,PointsAtSPRatio,GridSize,Sparsity
GridRows,1.0,0.994711,0.205476,0.614767,0.037145,0.317547,0.868909,0.886757,0.319098,0.947102,0.94811,-0.618879,0.862172,-0.550734
GridColumns,0.994711,1.0,0.196172,0.662615,0.034997,0.345687,0.845832,0.87009,0.309306,0.962895,0.9634,-0.606807,0.89252,-0.535093
NumOfAgents,0.205476,0.196172,1.0,0.026835,0.401826,0.020335,0.235792,0.376233,-0.34606,0.283907,0.286802,0.360581,0.087612,-0.264622
NumOfObstacles,0.614767,0.662615,0.026835,1.0,0.004848,0.57352,0.22075,0.325759,0.035389,0.71366,0.711916,-0.214241,0.914869,-0.081564
BranchingFactor,0.037145,0.034997,0.401826,0.004848,1.0,0.000515,0.042485,0.082591,-0.095467,0.057307,0.059042,0.179169,0.015894,-0.027661
ObstacleDensity,0.317547,0.345687,0.020335,0.57352,0.000515,1.0,0.14156,0.189172,-0.046987,0.38262,0.383927,-0.266975,0.451894,-0.082759
AvgDistanceToGoal,0.868909,0.845832,0.235792,0.22075,0.042485,0.14156,1.0,0.930397,0.426762,0.786565,0.790002,-0.63067,0.551358,-0.626841
MaxDistanceToGoal,0.886757,0.87009,0.376233,0.325759,0.082591,0.189172,0.930397,1.0,0.237909,0.848835,0.85128,-0.520586,0.625806,-0.581544
MinDistanceToGoal,0.319098,0.309306,-0.34606,0.035389,-0.095467,-0.046987,0.426762,0.237909,1.0,0.195542,0.198951,-0.500265,0.183819,-0.272702
AvgStartDistances,0.947102,0.962895,0.283907,0.71366,0.057307,0.38262,0.786565,0.848835,0.195542,1.0,0.983508,-0.493701,0.899299,-0.478893


## Save labelled data to file

In [14]:
# df.to_csv("data/from-azure/AllData-labelled.csv")