In [1]:
import pandas as pd

In [28]:
df = pd.read_csv("./neural_non_neural_comparison.csv")
df.head()

Unnamed: 0,dataset_fold_id,Accuracy__test_neural,Accuracy__test_non-neural,F1__test_neural,F1__test_non-neural,MSE__test_neural,MSE__test_non-neural,Log Loss__test_neural,Log Loss__test_non-neural,alg_name_neural,alg_name_non-neural,time__train_neural,time__train_non-neural,time__test_neural,time__test_non-neural
0,openml__APSFailure__168868__fold_0,0.992763,0.995263,0.992763,0.995263,,,0.019384,0.01648,TabTransformer,LightGBM,156.269556,25.718933,0.123314,0.100582
1,openml__APSFailure__168868__fold_1,0.988684,0.992237,0.988684,0.992237,,,0.030806,0.022713,TabTransformer,XGBoost,336.442136,0.940423,0.112383,0.056885
2,openml__APSFailure__168868__fold_2,0.990395,0.993947,0.990395,0.993947,,,0.026238,0.020097,TabTransformer,XGBoost,380.952997,1.18313,0.117252,0.066141
3,openml__APSFailure__168868__fold_3,0.992368,0.995526,0.992368,0.995526,,,0.024933,0.018053,TabTransformer,XGBoost,381.007164,1.730255,0.118603,0.082695
4,openml__APSFailure__168868__fold_4,0.991184,0.995789,0.991184,0.995789,,,0.030389,0.013555,TabTransformer,XGBoost,178.289103,2.699572,0.142411,0.085889


## Analyze difference between neural/non-neural algs

In [47]:
# calculate difference between nerual/non-neural performance for eahn metric
metric_list = ["Accuracy__test", "F1__test", "MSE__test", "Log Loss__test", "time__train", "time__test"]

for metric in metric_list:
    df.loc[:, f"{metric}_diff"] = df[f"{metric}_neural"] - df[f"{metric}_non-neural"]

# include dataset name as well
df.loc[:, "dataset_name"] = df["dataset_fold_id"].apply(lambda x: x[:-len("__fold_1")])

In [55]:
# for how many datasets do neural methods win?
print("number of dataset splits where neural methods achieve higher (better) F1 score :")
print((df["F1__test_diff"] > 0).value_counts())

print("number of dataset splits where neural methods achieve lower (better) log loss :")
print((df["Log Loss__test_diff"] < 0).value_counts())

number of dataset splits where neural methods achieve higher (better) F1 score :
False    1281
True      399
Name: F1__test_diff, dtype: int64
number of dataset splits where neural methods achieve lower (better) log loss :
False    1191
True      489
Name: Log Loss__test_diff, dtype: int64


So, neural networks win on roughly 25% of all datasets. 

In [58]:
# which algorithms win - from both neural and non-neural?
print("most-winning neural algs:")
print(df["alg_name_neural"].value_counts())

print("most-winning non-neural algs:")
print(df["alg_name_non-neural"].value_counts())

most-winning neural algs:
TabNet                808
DANet                 192
TabTransformer        141
MLP                   125
SAINT                 116
STG                   104
NODE                   47
rtdl_ResNet            42
rtdl_FTTransformer     41
rtdl_MLP               25
DeepFM                 20
VIME                   18
NAM                     1
Name: alg_name_neural, dtype: int64
most-winning non-neural algs:
LightGBM        461
CatBoost        376
XGBoost         331
SVM             185
DecisionTree    161
RandomForest     73
KNN              57
LinearModel      36
Name: alg_name_non-neural, dtype: int64


The neural methods are largely dominated by TabNet, while various tree methods perform well for non-neural.

## Assess dataset metafeatures

In [59]:
# read & merge in meta-features
metafeatures_df = pd.read_csv("../TabSurvey/metafeatures.csv")
print(metafeatures_df.head())

                 dataset_name  f__pymfe.landmarking.best_node.count  \
0  openml__cjs__14967__fold_0                                    10   
1  openml__cjs__14967__fold_1                                    10   
2  openml__cjs__14967__fold_2                                    10   
3  openml__cjs__14967__fold_3                                    10   
4  openml__cjs__14967__fold_4                                    10   

   f__pymfe.landmarking.best_node.count.relative  \
0                                            4.0   
1                                            4.0   
2                                            4.0   
3                                            4.0   
4                                            4.0   

   f__pymfe.landmarking.best_node.histogram.0  \
0                                         0.3   
1                                         0.1   
2                                         0.3   
3                                         0.1   
4              

In [60]:
merged_df = df.merge(metafeatures_df, left_on="dataset_fold_id", right_on="dataset_name", how="left")

In [61]:
merged_df

Unnamed: 0,dataset_fold_id,Accuracy__test_neural,Accuracy__test_non-neural,F1__test_neural,F1__test_non-neural,MSE__test_neural,MSE__test_non-neural,Log Loss__test_neural,Log Loss__test_non-neural,alg_name_neural,...,f__pymfe.relative.worst_node.quantiles.4,f__pymfe.relative.worst_node.quantiles.4.relative,f__pymfe.relative.worst_node.range,f__pymfe.relative.worst_node.range.relative,f__pymfe.relative.worst_node.sd,f__pymfe.relative.worst_node.sd.relative,f__pymfe.relative.worst_node.skewness,f__pymfe.relative.worst_node.skewness.relative,f__pymfe.statistical.iq_range,f__pymfe.statistical.t_mean
0,openml__APSFailure__168868__fold_0,0.992763,0.995263,0.992763,0.995263,,,0.019384,0.016480,TabTransformer,...,0.558756,3.0,0.058756,3.0,0.022957,4.0,1.308457,5.0,,
1,openml__APSFailure__168868__fold_1,0.988684,0.992237,0.988684,0.992237,,,0.030806,0.022713,TabTransformer,...,0.500000,2.0,0.000000,2.0,0.000000,2.0,,7.0,,
2,openml__APSFailure__168868__fold_2,0.990395,0.993947,0.990395,0.993947,,,0.026238,0.020097,TabTransformer,...,0.500000,2.0,0.000000,2.0,0.000000,2.0,,7.0,,
3,openml__APSFailure__168868__fold_3,0.992368,0.995526,0.992368,0.995526,,,0.024933,0.018053,TabTransformer,...,0.500000,2.0,0.000000,2.0,0.000000,2.0,,7.0,,
4,openml__APSFailure__168868__fold_4,0.991184,0.995789,0.991184,0.995789,,,0.030389,0.013555,TabTransformer,...,0.500000,2.0,0.000000,2.0,0.000000,2.0,,7.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1675,openml__yeast__145793__fold_5,0.653543,0.669291,0.651901,0.672491,,,0.856060,0.772932,TabNet,...,0.250000,1.5,0.014286,1.5,0.004821,2.0,-1.639149,2.0,,
1676,openml__yeast__145793__fold_6,0.614173,0.669291,0.613821,0.669725,,,1.062902,0.822254,TabNet,...,0.250000,1.5,0.007143,1.5,0.003012,2.0,-1.280722,3.0,,
1677,openml__yeast__145793__fold_7,0.637795,0.685039,0.632844,0.678220,,,0.983789,0.854731,STG,...,0.250000,1.5,0.007143,2.0,0.002259,2.0,-2.276840,2.0,,
1678,openml__yeast__145793__fold_8,0.622047,0.677165,0.620988,0.672749,,,0.967557,0.879122,TabNet,...,0.250000,1.5,0.007143,2.0,0.002259,2.0,-2.276840,1.0,,


In [89]:
# correlation between Log Loss difference and each meta-feature.

metafeature_cols = metafeatures_df.columns[1:]

metric_col = "Log Loss__test_diff"

corrs = []
for col in metafeature_cols:
    corrs.append(merged_df[metric_col].corr(merged_df[col]))

corr_df = pd.DataFrame(
    {
        "metafeature": metafeature_cols,
        f"corr_with_{metric_col}": corrs,
    }
)

corr_df.loc[:, "abs_corr"] = corr_df[f"corr_with_{metric_col}"]

In [92]:
print(corr_df.sort_values(f"abs_corr").head(20))

                                            metafeature  \
461        f__pymfe.statistical.eigenvalues.histogram.0   
1434                  f__pymfe.relative.naive_bayes.min   
1436          f__pymfe.relative.naive_bayes.quantiles.0   
184        f__pymfe.landmarking.naive_bayes.quantiles.0   
182                f__pymfe.landmarking.naive_bayes.min   
922        f__pymfe.model-based.leaves_homo.histogram.0   
976    f__pymfe.model-based.nodes_per_level.histogram.1   
132               f__pymfe.landmarking.linear_discr.min   
134       f__pymfe.landmarking.linear_discr.quantiles.0   
1384                 f__pymfe.relative.linear_discr.min   
1386         f__pymfe.relative.linear_discr.quantiles.0   
1125         f__pymfe.info-theory.attr_conc.histogram.0   
1395  f__pymfe.relative.linear_discr.quantiles.4.rel...   
143   f__pymfe.landmarking.linear_discr.quantiles.4....   
1379        f__pymfe.relative.linear_discr.max.relative   
127      f__pymfe.landmarking.linear_discr.max.relative 

In [66]:
help(print)

Help on built-in function print in module builtins:

print(...)
    print(value, ..., sep=' ', end='\n', file=sys.stdout, flush=False)
    
    Prints the values to a stream, or to sys.stdout by default.
    Optional keyword arguments:
    file:  a file-like object (stream); defaults to the current sys.stdout.
    sep:   string inserted between values, default a space.
    end:   string appended after the last value, default a newline.
    flush: whether to forcibly flush the stream.



In [81]:
merged_df["Accuracy__test_diff"].corr(merged_df["Log Loss__test_neural"])

-0.5025041657277165