In [1]:
import pandas as pd

In [7]:
df = pd.read_csv("./neural_non_neural_comparison.csv")
df.head()

Unnamed: 0,dataset_fold_id,Accuracy__test_neural,Accuracy__test_non-neural,F1__test_neural,F1__test_non-neural,MSE__test_neural,MSE__test_non-neural,Log Loss__test_neural,Log Loss__test_non-neural,alg_name_neural,alg_name_non-neural,time__train_neural,time__train_non-neural,time__test_neural,time__test_non-neural
0,openml__Amazon_employee_access__34539__fold_0,0.945072,0.945682,0.945072,0.945682,,,0.161965,0.154921,SAINT,CatBoost,121.02103,1.854008,0.144149,0.041222
1,openml__Amazon_employee_access__34539__fold_1,0.951175,0.951175,0.951175,0.951175,,,0.159829,0.154622,SAINT,LightGBM,121.650418,4.938074,0.150993,0.221166
2,openml__Amazon_employee_access__34539__fold_2,0.946292,0.948123,0.946292,0.948123,,,0.168089,0.154905,SAINT,LightGBM,88.650241,4.01522,0.130198,0.118386
3,openml__Amazon_employee_access__34539__fold_3,0.946597,0.949954,0.946597,0.949954,,,0.163559,0.158825,SAINT,LightGBM,127.776521,4.45651,0.159156,0.143771
4,openml__Amazon_employee_access__34539__fold_4,0.957583,0.955447,0.957583,0.955447,,,0.142165,0.146234,SAINT,LightGBM,93.065,3.04613,0.140077,0.066967


## Analyze difference between neural/non-neural algs

In [8]:
# calculate difference between nerual/non-neural performance for eahn metric
metric_list = ["Accuracy__test", "F1__test", "MSE__test", "Log Loss__test", "time__train", "time__test"]

for metric in metric_list:
    df.loc[:, f"{metric}_diff"] = df[f"{metric}_neural"] - df[f"{metric}_non-neural"]

# include dataset name as well
df.loc[:, "dataset_name"] = df["dataset_fold_id"].apply(lambda x: x[:-len("__fold_1")])

In [9]:
# for how many datasets do neural methods win?
print("number of dataset splits where neural methods achieve higher (better) F1 score :")
print((df["F1__test_diff"] > 0).value_counts())

print("number of dataset splits where neural methods achieve lower (better) log loss :")
print((df["Log Loss__test_diff"] < 0).value_counts())

number of dataset splits where neural methods achieve higher (better) F1 score :
False    975
True     335
Name: F1__test_diff, dtype: int64
number of dataset splits where neural methods achieve lower (better) log loss :
False    891
True     419
Name: Log Loss__test_diff, dtype: int64


So, neural networks win on roughly 25% of all datasets. 

In [10]:
# which algorithms win - from both neural and non-neural?
print("most-winning neural algs:")
print(df["alg_name_neural"].value_counts())

print("most-winning non-neural algs:")
print(df["alg_name_non-neural"].value_counts())

most-winning neural algs:
TabNet                564
DANet                 181
SAINT                 148
TabTransformer         84
rtdl_FTTransformer     69
rtdl_ResNet            68
MLP                    61
rtdl_MLP               47
NODE                   37
STG                    35
VIME                    9
DeepFM                  6
NAM                     1
Name: alg_name_neural, dtype: int64
most-winning non-neural algs:
CatBoost        331
LightGBM        328
XGBoost         201
SVM             154
DecisionTree    139
RandomForest     71
KNN              55
LinearModel      31
Name: alg_name_non-neural, dtype: int64


The neural methods are largely dominated by TabNet, while various tree methods perform well for non-neural.

## Assess dataset metafeatures

In [11]:
# read & merge in meta-features
metafeatures_df = pd.read_csv("../TabSurvey/metafeatures.csv")
print(metafeatures_df.head())

                 dataset_name  f__pymfe.landmarking.best_node.count  \
0  openml__cjs__14967__fold_0                                    10   
1  openml__cjs__14967__fold_1                                    10   
2  openml__cjs__14967__fold_2                                    10   
3  openml__cjs__14967__fold_3                                    10   
4  openml__cjs__14967__fold_4                                    10   

   f__pymfe.landmarking.best_node.count.relative  \
0                                            4.0   
1                                            4.0   
2                                            4.0   
3                                            4.0   
4                                            4.0   

   f__pymfe.landmarking.best_node.histogram.0  \
0                                         0.3   
1                                         0.1   
2                                         0.3   
3                                         0.1   
4              

In [12]:
merged_df = df.merge(metafeatures_df, left_on="dataset_fold_id", right_on="dataset_name", how="left")

In [13]:
merged_df

Unnamed: 0,dataset_fold_id,Accuracy__test_neural,Accuracy__test_non-neural,F1__test_neural,F1__test_non-neural,MSE__test_neural,MSE__test_non-neural,Log Loss__test_neural,Log Loss__test_non-neural,alg_name_neural,...,f__pymfe.relative.worst_node.quantiles.4,f__pymfe.relative.worst_node.quantiles.4.relative,f__pymfe.relative.worst_node.range,f__pymfe.relative.worst_node.range.relative,f__pymfe.relative.worst_node.sd,f__pymfe.relative.worst_node.sd.relative,f__pymfe.relative.worst_node.skewness,f__pymfe.relative.worst_node.skewness.relative,f__pymfe.statistical.iq_range,f__pymfe.statistical.t_mean
0,openml__Amazon_employee_access__34539__fold_0,0.945072,0.945682,0.945072,0.945682,,,0.161965,0.154921,SAINT,...,,7.0,,7.0,,7.0,,7.0,,
1,openml__Amazon_employee_access__34539__fold_1,0.951175,0.951175,0.951175,0.951175,,,0.159829,0.154622,SAINT,...,,7.0,,7.0,,7.0,,7.0,,
2,openml__Amazon_employee_access__34539__fold_2,0.946292,0.948123,0.946292,0.948123,,,0.168089,0.154905,SAINT,...,,7.0,,7.0,,7.0,,7.0,,
3,openml__Amazon_employee_access__34539__fold_3,0.946597,0.949954,0.946597,0.949954,,,0.163559,0.158825,SAINT,...,,7.0,,7.0,,7.0,,7.0,,
4,openml__Amazon_employee_access__34539__fold_4,0.957583,0.955447,0.957583,0.955447,,,0.142165,0.146234,SAINT,...,,7.0,,7.0,,7.0,,7.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1305,openml__yeast__145793__fold_5,0.653543,0.669291,0.651901,0.672491,,,0.856060,0.772932,TabNet,...,0.25,1.5,0.014286,1.5,0.004821,2.0,-1.639149,2.0,,
1306,openml__yeast__145793__fold_6,0.614173,0.669291,0.613821,0.669725,,,1.062902,0.822254,TabNet,...,0.25,1.5,0.007143,1.5,0.003012,2.0,-1.280722,3.0,,
1307,openml__yeast__145793__fold_7,0.637795,0.685039,0.632844,0.678220,,,0.983789,0.854731,STG,...,0.25,1.5,0.007143,2.0,0.002259,2.0,-2.276840,2.0,,
1308,openml__yeast__145793__fold_8,0.622047,0.677165,0.620988,0.672749,,,0.967557,0.879122,TabNet,...,0.25,1.5,0.007143,2.0,0.002259,2.0,-2.276840,1.0,,


In [19]:
# correlation between Log Loss difference and each meta-feature.

metafeature_cols = metafeatures_df.columns[1:]

metric_col = "Log Loss__test_diff"

corrs = []
for col in metafeature_cols:
    corrs.append(merged_df[metric_col].corr(merged_df[col]))

corr_df = pd.DataFrame(
    {
        "metafeature": metafeature_cols,
        f"corr_with_{metric_col}": corrs,
    }
)

corr_df.loc[:, "abs_corr"] = corr_df[f"corr_with_{metric_col}"].abs()

In [21]:
print(corr_df.sort_values(f"abs_corr", ascending=False).head(50))

                                           metafeature  \
44                f__pymfe.landmarking.best_node.range   
1296                 f__pymfe.relative.best_node.range   
46                   f__pymfe.landmarking.best_node.sd   
1298                    f__pymfe.relative.best_node.sd   
1596                f__pymfe.relative.worst_node.range   
344              f__pymfe.landmarking.worst_node.range   
96                    f__pymfe.landmarking.elite_nn.sd   
1348                     f__pymfe.relative.elite_nn.sd   
1346                  f__pymfe.relative.elite_nn.range   
94                 f__pymfe.landmarking.elite_nn.range   
1548                  f__pymfe.relative.random_node.sd   
296                f__pymfe.landmarking.random_node.sd   
196                f__pymfe.landmarking.naive_bayes.sd   
1448                  f__pymfe.relative.naive_bayes.sd   
294             f__pymfe.landmarking.random_node.range   
1546               f__pymfe.relative.random_node.range   
1498          

Interesting that all entries with the largest correlations have positive correlations. Also, most of these are lanmarking-based metafeatures. 

For reference, we are correlating two numbers:
- metafeatures (e.g., performance of a landmarking method)
- difference between performance of (neural - non-neural) metric. For log-loss, lower values are better, so **positive values mean that the non-neural method performs better than the neural method. 

high correlation here means: higher values of the metafeature <==> non-neural method performs better.