# Compare to:
## 1. An empirical study on program failures of deep learning jobs, Zhang et al, ICSE20

### Label automatically notebooks that are DL

based on what libraries they use

In [34]:
import pandas as pd
import utils.config as config
import utils.util as util
import numpy as np

# manually labeled
df_mlerr_labels = pd.read_excel(config.path_default.joinpath('Manual_labeing/cluster_sampled_labeled_processed.xlsx'))
df_mlerr_labels_sum = pd.read_excel(config.path_default.joinpath('Manual_labeing/cluster_sampled_labeled_sum.xlsx'))

df_err_g = pd.read_excel(config.path_github_error_process.joinpath('nberror_g_all_eid_p.xlsx'))
df_err_k = pd.read_excel(config.path_kaggle_error_process.joinpath('nberror_k_eid_p.xlsx'))

df_err_g.drop_duplicates(subset=['eid'], keep='first', inplace = True)
df_err_k.drop_duplicates(subset=['eid'], keep='first', inplace = True)

def get_DL(df_mlerr_labels, save_path = None):
    df_mlerr_labels = pd.merge(df_mlerr_labels, 
                               pd.concat([df_err_g[["eid","lib_alias"]], df_err_k[["eid","lib_alias"]]], ignore_index=True), 
                               on="eid", how="left")
    df_mlerr_labels["is_DLnb"] = df_mlerr_labels.lib_alias.apply(util.lib_alias_isDL)
    print(sum(df_mlerr_labels["is_DLnb"])/df_mlerr_labels.shape[0])
    print(df_mlerr_labels[df_mlerr_labels.is_DLnb==True].fname.nunique()/df_mlerr_labels.fname.nunique())

    df_mlerr_labels = df_mlerr_labels[df_mlerr_labels.is_DLnb==True]
    df_mlerr_labels = df_mlerr_labels.drop(['is_DLnb', 'lib_alias'], axis=1)
    if save_path:
        df_mlerr_labels.to_excel(save_path, index=False, engine='xlsxwriter')
    
get_DL(df_mlerr_labels, save_path = config.path_default.joinpath("Manual_labeing/cluster_sampled_labeled_DL.xlsx"))
get_DL(df_mlerr_labels_sum, save_path = config.path_default.joinpath("Manual_labeing/cluster_sampled_labeled_DL_sum.xlsx"))

0.4584450402144772
0.4599447513812155
0.4584450402144772
0.4599447513812155


### Map our labeling to theirs: 

Main categories:

    Execution environment: 
        label_root_cause == config.label_root_cause["environment"]
    Data (dueing data preocessing, data integrity is compromised. i.e., corrupt data, unexpected encoding): 
        label_refined_exp_type == ["jsondecodeerror", "unsupported file type (read file)", "incompleteparseerror"]
        ename=="unicodedecodeerror"
    DL specific: 
        label_if_ML_bug == config.label_if_ML_bug["ML bug"]
    General code error: 
        label_if_ML_bug == config.label_if_ML_bug["python bug"]
    

In [8]:
import pandas as pd
import utils.config as config
import utils.util as util
import numpy as np

df_mlerr_labels_DL = pd.read_excel(config.path_default.joinpath('Manual_labeing/cluster_sampled_labeled_DL.xlsx'))
df_mlerr_labels_sum_DL = pd.read_excel(config.path_default.joinpath('Manual_labeing/cluster_sampled_labeled_DL_sum.xlsx'))

In [9]:
df_mlerr_labels_DL["Comp1_DLJobs"] = -1
df_mlerr_labels_DL["Comp1_DLJobs"] = np.where(df_mlerr_labels_DL['label_root_cause'].isin(config.label_root_cause["environment"]),
                                              "Execution environment",
                                              df_mlerr_labels_DL["Comp1_DLJobs"])
df_mlerr_labels_DL["Comp1_DLJobs"] = np.where((df_mlerr_labels_DL['label_refined_exp_type'].isin(["jsondecodeerror", "unsupported file type (read file)", "incompleteparseerror"])|(df_mlerr_labels_DL['ename']=="unicodedecodeerror"))&(df_mlerr_labels_DL["Comp1_DLJobs"]=="-1"),
                                              "Data",
                                              df_mlerr_labels_DL["Comp1_DLJobs"])
df_mlerr_labels_DL["Comp1_DLJobs"] = np.where(df_mlerr_labels_DL['label_if_ML_bug'].isin(config.label_if_ML_bug["ML bug"])&(df_mlerr_labels_DL["Comp1_DLJobs"]=="-1"),
                                              "DL specific",
                                              df_mlerr_labels_DL["Comp1_DLJobs"])
df_mlerr_labels_DL["Comp1_DLJobs"] = np.where(df_mlerr_labels_DL['label_if_ML_bug'].isin(config.label_if_ML_bug["python bug"])&(df_mlerr_labels_DL["Comp1_DLJobs"]=="-1"),
                                              "General code error",
                                              df_mlerr_labels_DL["Comp1_DLJobs"])
df_mlerr_labels_sum_DL = pd.merge(df_mlerr_labels_sum_DL, 
                               df_mlerr_labels_DL[["eid","Comp1_DLJobs"]], 
                               on="eid", how="left")

In [33]:
df_mlerr_labels_DL["Comp1_DLJobs"].value_counts()

Comp1_DLJobs
DL specific              205
Execution environment     68
General code error        66
Data                       3
Name: count, dtype: int64

In [10]:
df_mlerr_labels_DL["Comp1_DLJobs"].value_counts()/df_mlerr_labels_DL.shape[0]

Comp1_DLJobs
DL specific              0.599415
Execution environment    0.198830
General code error       0.192982
Data                     0.008772
Name: count, dtype: float64

Sub-categories:

DL specific:

    GPU out of memory/CPU out of memory:
        label_refined_exp_type=="out of memory (OOM)"
    Framework API misuse:
        label_root_cause.isin(config.label_root_cause["API"])
    Tensor Mismatch:
        label_refined_exp_type=="tensor shape mismatch"
    Loss NaN: - 
    
    ++ what is the top 3 for us?
    
Execution environment:

    Path not found:
        label_root_cause=="file/path not found or exist"
    Library not found:
        label_root_cause=="module not installed"
    Permission denied:
        label_root_cause=="settings(permission, environment)"
    
    ++ what is the top 1 for us?
    
General code error:

    Illegal argument:
        label_root_cause.isin(config.label_root_cause["API"])
    Type mismatch:
        label_refined_exp_type.isin(config.label_refined_exp_type["type"])
    Key not found:
        label_refined_exp_type.isin(config.label_refined_exp_type["key"])
    ...
    
    ++ what is the top 3 for us?
    
Data:

    Corrupt data:
        label_refined_exp_type == ["unsupported file type (read file)", "incompleteparseerror"]
    Unexpected encoding:
        label_refined_exp_type == "jsondecodeerror" | ename=="unicodedecodeerror"

In [25]:
df_mlerr_labels_DL_dlspecific = df_mlerr_labels_DL[df_mlerr_labels_DL["Comp1_DLJobs"]=="DL specific"]
print("GPU/CPU out of memory: ", sum((df_mlerr_labels_DL_dlspecific["label_refined_exp_type"]=="out of memory (OOM)"))/df_mlerr_labels_DL_dlspecific.shape[0])
print("Tensor Mismatch: ", sum(df_mlerr_labels_DL_dlspecific["label_refined_exp_type"]=="tensor shape mismatch")/df_mlerr_labels_DL_dlspecific.shape[0])
print("Framework API misuse: ", sum(df_mlerr_labels_DL_dlspecific["label_root_cause"].isin(config.label_root_cause["API"])&(df_mlerr_labels_DL_dlspecific["label_refined_exp_type"]!="tensor shape mismatch"))/df_mlerr_labels_DL_dlspecific.shape[0])

df_mlerr_labels_DL_dlspecific.label_refined_exp_type.value_counts()
# our top 3 are the same as theirs (tho we label API misuse a bit differently - as root cause)

GPU/CPU out of memory:  0.07804878048780488
Tensor Mismatch:  0.14634146341463414
Framework API misuse:  0.21951219512195122


label_refined_exp_type
tensor shape mismatch                                 30
variable not found                                    28
wrong arguments to API                                25
out of memory (OOM)                                   16
attributeerror                                        13
module not found                                      12
keyerror                                              10
valueerror - data value violation                      9
runtimeerror                                           8
unsupported broadcast                                  5
cast exception                                         5
valueerror - data range mismatch                       5
indexerror-nd                                          5
function not found                                     5
valueerror - feature name mismatch                     4
indexerror-1d                                          4
typeerror-notcallable                                  3
typeerro

In [28]:
df_mlerr_labels_DL_env = df_mlerr_labels_DL[df_mlerr_labels_DL["Comp1_DLJobs"]=="Execution environment"]
print("Path not found: ", sum((df_mlerr_labels_DL_env["label_root_cause"]=="file/path not found or exist"))/df_mlerr_labels_DL_env.shape[0])
print("Library not found: ", sum(df_mlerr_labels_DL_env["label_root_cause"]=="module not installed")/df_mlerr_labels_DL_env.shape[0])
print("Permission denied: ", sum(df_mlerr_labels_DL_env["label_root_cause"]=="settings(permission, environment)")/df_mlerr_labels_DL_env.shape[0])

df_mlerr_labels_DL_env.label_root_cause.value_counts()
# top 3 align

Path not found:  0.5294117647058824
Library not found:  0.16176470588235295
Permission denied:  0.14705882352941177


label_root_cause
file/path not found or exist         36
module not installed                 11
settings(permission, environment)    10
library versions incompatible         6
change of environment                 3
external control (window closed)      2
Name: count, dtype: int64

In [31]:
df_mlerr_labels_DL_py = df_mlerr_labels_DL[df_mlerr_labels_DL["Comp1_DLJobs"]=="General code error"]
print("Illegal argument: ", sum((df_mlerr_labels_DL_py["label_root_cause"].isin(config.label_root_cause["API"])))/df_mlerr_labels_DL_py.shape[0])
print("Type mismatch: ", sum((~df_mlerr_labels_DL_py["label_root_cause"].isin(config.label_root_cause["API"]))&(df_mlerr_labels_DL_py["label_refined_exp_type"].isin(config.label_refined_exp_type["type"])))/df_mlerr_labels_DL_py.shape[0])
print("Key not found: ", sum((~df_mlerr_labels_DL_py["label_root_cause"].isin(config.label_root_cause["API"]))&(df_mlerr_labels_DL_py["label_refined_exp_type"].isin(config.label_refined_exp_type["key"])))/df_mlerr_labels_DL_py.shape[0])
print("Name error: ", sum((~df_mlerr_labels_DL_py["label_root_cause"].isin(config.label_root_cause["API"]))&(df_mlerr_labels_DL_py["label_refined_exp_type"].isin(config.label_refined_exp_type["name"])))/df_mlerr_labels_DL_py.shape[0])

df_mlerr_labels_DL_py.label_refined_exp_type.value_counts()
# name error is the most, then illegal argument

Illegal argument:  0.18181818181818182
Type mismatch:  0.06060606060606061
Key not found:  0.0
Name error:  0.5303030303030303


label_refined_exp_type
variable not found                  17
function not found                  11
attributeerror                       6
module not found                     5
wrong arguments to API               5
indexerror-1d                        3
valueerror - data range mismatch     3
class not found                      2
typeerror                            2
syntaxerror                          2
keyerror                             1
requesterror                         1
typeerror-notiterable                1
systemerror                          1
valueerror                           1
filenotfounderror                    1
typeerror-op                         1
indexerror-nd                        1
typeerror-notcallable                1
typeerror-notsubscriptable           1
Name: count, dtype: int64

In [32]:
df_mlerr_labels_DL_data = df_mlerr_labels_DL[df_mlerr_labels_DL["Comp1_DLJobs"]=="Data"]
print("Corrupt data: ", sum((df_mlerr_labels_DL_data["label_refined_exp_type"].isin(["unsupported file type (read file)", "incompleteparseerror"])))/df_mlerr_labels_DL_data.shape[0])
print("Unexpected encoding: ", sum((df_mlerr_labels_DL_data["label_refined_exp_type"]=="jsondecodeerror")|df_mlerr_labels_DL_data["ename"]=="unicodedecodeerror")/df_mlerr_labels_DL_data.shape[0])

df_mlerr_labels_DL_data.label_refined_exp_type.value_counts()
# all three cases are corrupted data

Corrupt data:  1.0
Unexpected encoding:  0.0


label_refined_exp_type
unsupported file type (read file)    3
Name: count, dtype: int64

## 2. Bug Analysis in Jupyter Notebook Projects: An Empirical Study, De Santana et al, TSEM2024