In [17]:
import pandas as pd
import itertools
import json

In [55]:
path_training_res = "./training_res.csv"
df_training_res = pd.read_csv(path_training_res)
path_analysis_res = "./datasets_analysis/data_dist.csv"
df_analysis_res = pd.read_csv(path_analysis_res)

df_ds = df_analysis_res[["ds_name", "test_classes_dist"]]
df_ds["max_dist_val"] = df_ds.apply(lambda row: max(json.loads(row["test_classes_dist"]).values()), axis=1)
df_training_res_with_dist = pd.merge(df_training_res, df_ds, right_on="ds_name", left_on="dataset_name")
df_training_res_with_dist = df_training_res_with_dist[["dataset_name", "model_name", "test_acc", "max_dist_val"]]
df_training_res_with_dist["diff"] = df_training_res_with_dist.apply(lambda row: round(row["test_acc"], 2) > (round(row["max_dist_val"], 2) + 0.1), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ds["max_dist_val"] = df_ds.apply(lambda row: max(json.loads(row["test_classes_dist"]).values()), axis=1)


In [56]:
df_training_res_with_dist

Unnamed: 0,dataset_name,model_name,test_acc,max_dist_val,diff
0,cbf,MLP,0.815556,0.335556,True
1,cbf,MCDCNN,0.278889,0.335556,False
2,cbf,Time_CNN,0.331111,0.335556,False
3,cbf,FCN,0.445556,0.335556,True
4,cbf,Encoder,0.860000,0.335556,True
...,...,...,...,...,...
217,large_kitchen_appliances,MCDCNN,0.333333,0.333333,False
218,large_kitchen_appliances,Time_CNN,0.333333,0.333333,False
219,large_kitchen_appliances,FCN,0.701333,0.333333,True
220,large_kitchen_appliances,Encoder,0.402667,0.333333,False


# Number working datasets for each model

In [64]:
working_ds_di = dict()
all_model_names = ["MLP", "MCDCNN", "Time_CNN", "FCN", "Encoder", "Resnet"]
for model in all_model_names:
    print("----------------------------------------------------------------------")
    print("Model name", model)
    working_ds = df_training_res_with_dist[(df_training_res_with_dist["model_name"]==model) & \
                                           (df_training_res_with_dist["diff"] == True)]
    print("Number working datasets: ", len(working_ds))
    working_ds_di[model] = set(working_ds["dataset_name"].unique().tolist())
    if len(working_ds) < 4:
        print(working_ds["dataset_name"].unique())

----------------------------------------------------------------------
Model name MLP
Number working datasets:  22
----------------------------------------------------------------------
Model name MCDCNN
Number working datasets:  3
['electric_devices' 'freezers' 'sony_robot']
----------------------------------------------------------------------
Model name Time_CNN
Number working datasets:  3
['electric_devices' 'mote_strain' 'egg_five_days']
----------------------------------------------------------------------
Model name FCN
Number working datasets:  17
----------------------------------------------------------------------
Model name Encoder
Number working datasets:  29
----------------------------------------------------------------------
Model name Resnet
Number working datasets:  18


# Number working datasets for combinations of models


In [58]:
num_models = len(all_model_names)
num_working_for_comb = list()
for l in range(num_models + 1):
    for curr_models in itertools.combinations(all_model_names, l):
        if len(curr_models) > 1:
            curr_ds = working_ds_di[curr_models[0]]
            for mod in curr_models[1:]:
                curr_ds = working_ds_di[mod].intersection(curr_ds)
            num_working_for_comb.append((curr_models, len(curr_ds), curr_ds))
    

In [59]:
df_num_working_ds = pd.DataFrame(num_working_for_comb, columns=['Models', 'Num_datasets', 'Datasets'])

In [60]:
df_num_working_ds[df_num_working_ds["Num_datasets"] > 3]

Unnamed: 0,Models,Num_datasets,Datasets
2,"(MLP, FCN)",12,"{fifty_words, power_cons, distal_phalanax_tw, ..."
3,"(MLP, Encoder)",22,"{fifty_words, power_cons, swedish_leaf, cricke..."
4,"(MLP, Resnet)",12,"{fifty_words, power_cons, medical_images, swed..."
12,"(FCN, Encoder)",16,"{fifty_words, power_cons, distal_phalanx_outli..."
13,"(FCN, Resnet)",16,"{fifty_words, power_cons, distal_phalanx_outli..."
14,"(Encoder, Resnet)",17,"{fifty_words, power_cons, distal_phalanx_outli..."
22,"(MLP, FCN, Encoder)",12,"{fifty_words, power_cons, distal_phalanax_tw, ..."
23,"(MLP, FCN, Resnet)",11,"{fifty_words, power_cons, distal_phalanax_tw, ..."
24,"(MLP, Encoder, Resnet)",12,"{fifty_words, power_cons, medical_images, swed..."
34,"(FCN, Encoder, Resnet)",15,"{fifty_words, power_cons, distal_phalanx_outli..."


In [61]:
df_num_working_ds["Datasets"][44]

{'cricket_y',
 'distal_phalanax_tw',
 'egg_five_days',
 'electric_devices',
 'face_ucr',
 'fifty_words',
 'mote_strain',
 'power_cons',
 'sony_robot',
 'swedish_leaf',
 'synthetic_control'}

# Accuracy > max dist + 0.2

In [43]:
df_num_working_ds[df_num_working_ds["Num_datasets"] > 3]

Unnamed: 0,Models,Num_datasets,Datasets
2,"(MLP, FCN)",9,"{power_cons, distal_phalanax_tw, swedish_leaf,..."
3,"(MLP, Encoder)",20,"{fifty_words, power_cons, swedish_leaf, cricke..."
4,"(MLP, Resnet)",9,"{distal_phalanax_tw, swedish_leaf, synthetic_c..."
12,"(FCN, Encoder)",12,"{power_cons, distal_phalanax_tw, swedish_leaf,..."
13,"(FCN, Resnet)",12,"{distal_phalanax_tw, swedish_leaf, large_kitch..."
14,"(Encoder, Resnet)",13,"{distal_phalanax_tw, swedish_leaf, synthetic_c..."
22,"(MLP, FCN, Encoder)",9,"{power_cons, distal_phalanax_tw, swedish_leaf,..."
23,"(MLP, FCN, Resnet)",8,"{distal_phalanax_tw, swedish_leaf, synthetic_c..."
24,"(MLP, Encoder, Resnet)",9,"{distal_phalanax_tw, swedish_leaf, synthetic_c..."
34,"(FCN, Encoder, Resnet)",11,"{distal_phalanax_tw, swedish_leaf, synthetic_c..."


In [44]:
df_num_working_ds["Datasets"][44]

{'cricket_y',
 'distal_phalanax_tw',
 'egg_five_days',
 'electric_devices',
 'mote_strain',
 'sony_robot',
 'swedish_leaf',
 'synthetic_control'}

# Working models for datasets

In [49]:
working_ds

Unnamed: 0_level_0,model_name
dataset_name,Unnamed: 1_level_1
arrow_head,MLP Encoder
bme,MLP Encoder
car,MLP Encoder
cbf,MLP Encoder
coffee_beans,MLP Encoder
cricket_y,MLP FCN Encoder Resnet
distal_phalanax_tw,MLP FCN Encoder Resnet
distal_phalanx_outline,Resnet
electric_devices,MLP FCN Encoder Resnet
face_ucr,MLP Encoder Resnet


In [None]:
working_ds_based_on_cf_matrix_for_4_models = ["swedish_leaf", "distal_phalanx_outline", "strawberry", \
                                 "electric_devices", "synthetic_control", "cricket_y", "power_cons"]