In [144]:
import pandas as pd
import warnings
from apyori import apriori
warnings.filterwarnings('ignore')

In [145]:
df = pd.read_csv('../Dataset/Dataset_EditActions.csv')
df = df.drop(['Architecture', 'Reproducible?', 'Code Snippet Present?', 'System Configuration Present?', 'Data Description Present?', 'Framework'], axis = 1)

In [146]:
# Replace Training Bug with value T, Model bug with value M, API bug with value A, Tensor & Input Bug with value I, and GPU Bug with value G
df.loc[df['Type of Bug'] == 'Training Bug', 'Type of Bug'] = 'T'
df.loc[df['Type of Bug'] == 'Model Bug', 'Type of Bug'] = 'M'
df.loc[df['Type of Bug'] == 'API Bug', 'Type of Bug'] = 'A'
df.loc[df['Type of Bug'] == 'Tensor and Input Bug', 'Type of Bug'] = 'I'
df.loc[df['Type of Bug'] == 'GPU Bug', 'Type of Bug'] = 'G'
df.loc[df['Type of Bug'] == 'Mixed Bug', 'Type of Bug'] = 'X'

In [147]:
# Map values from new columns as the following:
# Input Data Generation: D 
# Neural Network Definition: N
# Obsolete Parameter Removal: O
# Framework Migration: F
# Dataset Procurement: P
# Models and Tokenizers: M
# Logging: L
# Import Addition and Dependency Resolution: R
# Compiler Error Resolution: C
# Hyperparameter Initialization: H

# Replace all 1's with the corresponding letter
df['Input Data Generation'] = df['Input Data Generation'].replace(1, 'D')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(1, 'N')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(1, 'O')
df['Framework Migration'] = df['Framework Migration'].replace(1, 'F')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(1, 'P')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(1, 'M')
df['Logging'] = df['Logging'].replace(1, 'L')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(1, 'R')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(1, 'C')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(1, 'H')


df['Input Data Generation'] = df['Input Data Generation'].replace(0, '')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(0, '')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(0, '')
df['Framework Migration'] = df['Framework Migration'].replace(0, '')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(0, '')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(0, '')
df['Logging'] = df['Logging'].replace(0, '')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(0, '')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(0, '')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(0, '')

In [148]:
training_bugs = df[df['Type of Bug'] == 'T']
gpu_bugs = df[df['Type of Bug'] == 'G']
api_bugs = df[df['Type of Bug'] == 'A']
model_bugs = df[df['Type of Bug'] == 'M']
tensor_bugs = df[df['Type of Bug'] == 'I']
mixed_bugs = df[df['Type of Bug'] == 'X']

training_transactions = []
for i in range(0, len(training_bugs)):
    training_transactions.append([str(training_bugs.values[i,j]) for j in range(0, len(training_bugs.columns)) if str(training_bugs.values[i,j]) != ''])

gpu_transactions = []
for i in range(0, len(gpu_bugs)):
    gpu_transactions.append([str(gpu_bugs.values[i,j]) for j in range(0, len(gpu_bugs.columns)) if str(gpu_bugs.values[i,j]) != ''])

api_transactions = []
for i in range(0, len(api_bugs)):
    api_transactions.append([str(api_bugs.values[i,j]) for j in range(0, len(api_bugs.columns)) if str(api_bugs.values[i,j]) != ''])

model_transactions = []
for i in range(0, len(model_bugs)):
    model_transactions.append([str(model_bugs.values[i,j]) for j in range(0, len(model_bugs.columns)) if str(model_bugs.values[i,j]) != ''])

tensor_transactions = []
for i in range(0, len(tensor_bugs)):
    tensor_transactions.append([str(tensor_bugs.values[i,j]) for j in range(0, len(tensor_bugs.columns)) if str(tensor_bugs.values[i,j]) != ''])

mixed_transactions = []
for i in range(0, len(mixed_bugs)):
    mixed_transactions.append([str(mixed_bugs.values[i,j]) for j in range(0, len(mixed_bugs.columns)) if str(mixed_bugs.values[i,j]) != ''])

transactions = []
for i in range(0, len(df)):
    transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns)) if str(df.values[i,j]) != ''])

In [149]:
def get_apriori_results(rules, character):
    for result in list(rules):
     if character in result.items:
        items = ", ".join(result.items)
        support = result.support
        print(f"Items: {{{items}}}")
        print(f"Support: {support:.4f}")
        if result.ordered_statistics:
            print("Association Rules:")
            for rule in result.ordered_statistics:
                if character in rule.items_base:
                    antecedent = ", ".join(rule.items_base)
                    consequent = ", ".join(rule.items_add)
                    confidence = rule.confidence
                    lift = rule.lift
                    print(f"  {{{antecedent}}} => {{{consequent}}}")
                    print(f"Confidence: {confidence:.4f}")

In [150]:
get_apriori_results(apriori(training_transactions), 'T')

Items: {T}
Support: 1.0000
Association Rules:
Items: {T, C}
Support: 0.2692
Association Rules:
  {T} => {C}
Confidence: 0.2692
Items: {T, D}
Support: 0.6154
Association Rules:
  {T} => {D}
Confidence: 0.6154
Items: {F, T}
Support: 0.3462
Association Rules:
  {T} => {F}
Confidence: 0.3462
Items: {T, H}
Support: 0.3077
Association Rules:
  {T} => {H}
Confidence: 0.3077
Items: {T, L}
Support: 0.2692
Association Rules:
  {T} => {L}
Confidence: 0.2692
Items: {T, N}
Support: 0.1154
Association Rules:
  {T} => {N}
Confidence: 0.1154
Items: {T, O}
Support: 0.1538
Association Rules:
  {T} => {O}
Confidence: 0.1538
Items: {T, P}
Support: 0.2692
Association Rules:
  {T} => {P}
Confidence: 0.2692
Items: {T, R}
Support: 0.4231
Association Rules:
  {T} => {R}
Confidence: 0.4231
Items: {T, C, H}
Support: 0.1538
Association Rules:
  {T} => {C, H}
Confidence: 0.1538
  {T, C} => {H}
Confidence: 0.5714
  {T, H} => {C}
Confidence: 0.5000
Items: {T, L, C}
Support: 0.1154
Association Rules:
  {T} => {L, C}


In [151]:
get_apriori_results(apriori(model_transactions), 'M')

Items: {M}
Support: 1.0000
Association Rules:
Items: {C, M}
Support: 0.3000
Association Rules:
  {M} => {C}
Confidence: 0.3000
Items: {D, M}
Support: 0.3500
Association Rules:
  {M} => {D}
Confidence: 0.3500
Items: {F, M}
Support: 0.2000
Association Rules:
  {M} => {F}
Confidence: 0.2000
Items: {H, M}
Support: 0.5000
Association Rules:
  {M} => {H}
Confidence: 0.5000
Items: {L, M}
Support: 0.4500
Association Rules:
  {M} => {L}
Confidence: 0.4500
Items: {N, M}
Support: 0.7500
Association Rules:
  {M} => {N}
Confidence: 0.7500
Items: {O, M}
Support: 0.1500
Association Rules:
  {M} => {O}
Confidence: 0.1500
Items: {P, M}
Support: 0.4000
Association Rules:
  {M} => {P}
Confidence: 0.4000
Items: {R, M}
Support: 0.5500
Association Rules:
  {M} => {R}
Confidence: 0.5500
Items: {C, D, M}
Support: 0.1000
Association Rules:
  {M} => {C, D}
Confidence: 0.1000
  {C, M} => {D}
Confidence: 0.3333
  {D, M} => {C}
Confidence: 0.2857
Items: {F, C, M}
Support: 0.1000
Association Rules:
  {M} => {F, C}


In [152]:
get_apriori_results(apriori(gpu_transactions), 'G')

Items: {G}
Support: 1.0000
Association Rules:


In [153]:
get_apriori_results(apriori(tensor_transactions), 'I')

Items: {I}
Support: 1.0000
Association Rules:
Items: {I, C}
Support: 0.3333
Association Rules:
  {I} => {C}
Confidence: 0.3333
Items: {I, D}
Support: 0.5714
Association Rules:
  {I} => {D}
Confidence: 0.5714
Items: {I, H}
Support: 0.5238
Association Rules:
  {I} => {H}
Confidence: 0.5238
Items: {I, L}
Support: 0.4286
Association Rules:
  {I} => {L}
Confidence: 0.4286
Items: {I, O}
Support: 0.3333
Association Rules:
  {I} => {O}
Confidence: 0.3333
Items: {I, P}
Support: 0.1905
Association Rules:
  {I} => {P}
Confidence: 0.1905
Items: {I, R}
Support: 0.4762
Association Rules:
  {I} => {R}
Confidence: 0.4762
Items: {I, C, D}
Support: 0.2857
Association Rules:
  {I} => {C, D}
Confidence: 0.2857
  {I, C} => {D}
Confidence: 0.8571
  {I, D} => {C}
Confidence: 0.5000
Items: {I, C, O}
Support: 0.1905
Association Rules:
  {I} => {C, O}
Confidence: 0.1905
  {I, C} => {O}
Confidence: 0.5714
  {I, O} => {C}
Confidence: 0.5714
Items: {I, R, C}
Support: 0.1905
Association Rules:
  {I} => {C, R}
Confi

In [154]:
get_apriori_results(apriori(mixed_transactions), 'X')

Items: {X}
Support: 1.0000
Association Rules:
Items: {C, X}
Support: 0.5000
Association Rules:
  {X} => {C}
Confidence: 0.5000
Items: {X, D}
Support: 0.7500
Association Rules:
  {X} => {D}
Confidence: 0.7500
Items: {F, X}
Support: 0.2500
Association Rules:
  {X} => {F}
Confidence: 0.2500
Items: {H, X}
Support: 1.0000
Association Rules:
  {X} => {H}
Confidence: 1.0000
Items: {X, N}
Support: 0.2500
Association Rules:
  {X} => {N}
Confidence: 0.2500
Items: {P, X}
Support: 0.2500
Association Rules:
  {X} => {P}
Confidence: 0.2500
Items: {R, X}
Support: 0.7500
Association Rules:
  {X} => {R}
Confidence: 0.7500
Items: {C, X, D}
Support: 0.2500
Association Rules:
  {X} => {C, D}
Confidence: 0.2500
  {C, X} => {D}
Confidence: 0.5000
  {X, D} => {C}
Confidence: 0.3333
Items: {F, C, X}
Support: 0.2500
Association Rules:
  {X} => {F, C}
Confidence: 0.2500
  {C, X} => {F}
Confidence: 0.5000
  {F, X} => {C}
Confidence: 1.0000
Items: {C, H, X}
Support: 0.5000
Association Rules:
  {X} => {C, H}
Confi

### ECLAT

In [155]:
from pyECLAT import ECLAT

def get_eclat_support(transactions):
    eclat_instance = ECLAT(data=pd.DataFrame(transactions), verbose=True)
    get_ECLAT_indexes, get_ECLAT_supports = eclat_instance.fit(separator='', verbose=True)
    print (get_ECLAT_supports)

In [142]:
get_eclat_support(training_transactions)

100%|██████████| 11/11 [00:00<00:00, 576.08it/s]
100%|██████████| 11/11 [00:00<00:00, 10982.47it/s]
100%|██████████| 11/11 [00:00<00:00, 1386.17it/s]


Combination 1 by 1


10it [00:00, 199.11it/s]


Combination 2 by 2


45it [00:00, 270.02it/s]


Combination 3 by 3


120it [00:00, 315.23it/s]

{'P': 0.2692307692307692, 'T': 1.0, 'H': 0.3076923076923077, 'F': 0.34615384615384615, 'R': 0.4230769230769231, 'D': 0.6153846153846154, 'O': 0.15384615384615385, 'L': 0.2692307692307692, 'C': 0.2692307692307692, 'N': 0.11538461538461539, 'PT': 0.2692307692307692, 'PH': 0.11538461538461539, 'PF': 0.11538461538461539, 'PD': 0.11538461538461539, 'PL': 0.11538461538461539, 'PC': 0.15384615384615385, 'TH': 0.3076923076923077, 'TF': 0.34615384615384615, 'TR': 0.4230769230769231, 'TD': 0.6153846153846154, 'TO': 0.15384615384615385, 'TL': 0.2692307692307692, 'TC': 0.2692307692307692, 'TN': 0.11538461538461539, 'HR': 0.15384615384615385, 'HD': 0.15384615384615385, 'HC': 0.15384615384615385, 'FR': 0.11538461538461539, 'FD': 0.23076923076923078, 'RD': 0.23076923076923078, 'RO': 0.15384615384615385, 'RC': 0.15384615384615385, 'DL': 0.15384615384615385, 'OC': 0.15384615384615385, 'LC': 0.11538461538461539, 'CN': 0.11538461538461539, 'PTH': 0.11538461538461539, 'PTF': 0.11538461538461539, 'PTD': 0.




In [156]:
get_eclat_support(gpu_transactions)

100%|██████████| 1/1 [00:00<00:00, 1000.31it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 955.42it/s]


Combination 1 by 1


1it [00:00, 225.14it/s]


Combination 2 by 2


0it [00:00, ?it/s]


Combination 3 by 3


0it [00:00, ?it/s]

{'G': 1.0}





In [157]:
get_eclat_support(model_transactions)

100%|██████████| 11/11 [00:00<00:00, 472.34it/s]
100%|██████████| 11/11 [00:00<?, ?it/s]
100%|██████████| 11/11 [00:00<00:00, 1571.38it/s]


Combination 1 by 1


10it [00:00, 257.61it/s]


Combination 2 by 2


45it [00:00, 183.74it/s]


Combination 3 by 3


120it [00:00, 236.61it/s]

{'P': 0.4, 'H': 0.5, 'F': 0.2, 'R': 0.55, 'O': 0.15, 'D': 0.35, 'L': 0.45, 'C': 0.3, 'N': 0.75, 'M': 1.0, 'PH': 0.2, 'PF': 0.15, 'PR': 0.2, 'PO': 0.15, 'PD': 0.1, 'PL': 0.3, 'PC': 0.1, 'PN': 0.3, 'PM': 0.4, 'HF': 0.15, 'HR': 0.15, 'HO': 0.1, 'HD': 0.1, 'HL': 0.3, 'HC': 0.2, 'HN': 0.35, 'HM': 0.5, 'FO': 0.1, 'FL': 0.1, 'FC': 0.1, 'FN': 0.2, 'FM': 0.2, 'RD': 0.25, 'RL': 0.2, 'RC': 0.2, 'RN': 0.5, 'RM': 0.55, 'ON': 0.1, 'OM': 0.15, 'DL': 0.15, 'DC': 0.1, 'DN': 0.25, 'DM': 0.35, 'LN': 0.3, 'LM': 0.45, 'CN': 0.3, 'CM': 0.3, 'NM': 0.75, 'PHF': 0.1, 'PHO': 0.1, 'PHL': 0.15, 'PHC': 0.1, 'PHN': 0.2, 'PHM': 0.2, 'PFO': 0.1, 'PFL': 0.1, 'PFN': 0.15, 'PFM': 0.15, 'PRD': 0.1, 'PRL': 0.15, 'PRN': 0.15, 'PRM': 0.2, 'PON': 0.1, 'POM': 0.15, 'PDL': 0.1, 'PDN': 0.1, 'PDM': 0.1, 'PLN': 0.25, 'PLM': 0.3, 'PCN': 0.1, 'PCM': 0.1, 'PNM': 0.3, 'HFO': 0.1, 'HFC': 0.1, 'HFN': 0.15, 'HFM': 0.15, 'HRL': 0.1, 'HRC': 0.1, 'HRN': 0.15, 'HRM': 0.15, 'HON': 0.1, 'HOM': 0.1, 'HDM': 0.1, 'HLN': 0.2, 'HLM': 0.3, 'HCN': 0




In [158]:
get_eclat_support(api_transactions)

100%|██████████| 11/11 [00:00<00:00, 662.30it/s]
100%|██████████| 11/11 [00:00<?, ?it/s]
100%|██████████| 11/11 [00:00<00:00, 2754.14it/s]


Combination 1 by 1


10it [00:00, 240.89it/s]


Combination 2 by 2


45it [00:00, 226.23it/s]


Combination 3 by 3


120it [00:00, 283.84it/s]

{'P': 0.2727272727272727, 'H': 0.36363636363636365, 'F': 0.09090909090909091, 'A': 1.0, 'R': 0.45454545454545453, 'O': 0.09090909090909091, 'D': 0.36363636363636365, 'M': 0.09090909090909091, 'L': 0.18181818181818182, 'N': 0.18181818181818182, 'PH': 0.09090909090909091, 'PA': 0.2727272727272727, 'PR': 0.2727272727272727, 'PD': 0.09090909090909091, 'PN': 0.18181818181818182, 'HA': 0.36363636363636365, 'HR': 0.18181818181818182, 'HD': 0.18181818181818182, 'HM': 0.09090909090909091, 'HL': 0.09090909090909091, 'HN': 0.09090909090909091, 'FA': 0.09090909090909091, 'FO': 0.09090909090909091, 'AR': 0.45454545454545453, 'AO': 0.09090909090909091, 'AD': 0.36363636363636365, 'AM': 0.09090909090909091, 'AL': 0.18181818181818182, 'AN': 0.18181818181818182, 'RD': 0.2727272727272727, 'RM': 0.09090909090909091, 'RN': 0.18181818181818182, 'DM': 0.09090909090909091, 'DL': 0.09090909090909091, 'PHA': 0.09090909090909091, 'PHR': 0.09090909090909091, 'PHN': 0.09090909090909091, 'PAR': 0.2727272727272727, 




In [159]:
get_eclat_support(tensor_transactions)

100%|██████████| 11/11 [00:00<00:00, 501.26it/s]
100%|██████████| 11/11 [00:00<?, ?it/s]
100%|██████████| 11/11 [00:00<00:00, 1561.65it/s]


Combination 1 by 1


9it [00:00, 264.67it/s]


Combination 2 by 2


36it [00:00, 189.09it/s]


Combination 3 by 3


84it [00:00, 235.11it/s]

{'P': 0.19047619047619047, 'H': 0.5238095238095238, 'I': 1.0, 'R': 0.47619047619047616, 'D': 0.5714285714285714, 'O': 0.3333333333333333, 'L': 0.42857142857142855, 'C': 0.3333333333333333, 'N': 0.09523809523809523, 'PI': 0.19047619047619047, 'PD': 0.14285714285714285, 'PO': 0.09523809523809523, 'PC': 0.09523809523809523, 'HI': 0.5238095238095238, 'HR': 0.2857142857142857, 'HD': 0.2857142857142857, 'HO': 0.19047619047619047, 'HL': 0.3333333333333333, 'HC': 0.09523809523809523, 'IR': 0.47619047619047616, 'ID': 0.5714285714285714, 'IO': 0.3333333333333333, 'IL': 0.42857142857142855, 'IC': 0.3333333333333333, 'IN': 0.09523809523809523, 'RD': 0.3333333333333333, 'RO': 0.19047619047619047, 'RL': 0.23809523809523808, 'RC': 0.19047619047619047, 'DO': 0.23809523809523808, 'DL': 0.23809523809523808, 'DC': 0.2857142857142857, 'DN': 0.09523809523809523, 'OL': 0.23809523809523808, 'OC': 0.19047619047619047, 'LC': 0.09523809523809523, 'PID': 0.14285714285714285, 'PIO': 0.09523809523809523, 'PIC': 0.




In [160]:
get_eclat_support(mixed_transactions)

100%|██████████| 9/9 [00:00<00:00, 473.47it/s]
100%|██████████| 9/9 [00:00<?, ?it/s]
100%|██████████| 9/9 [00:00<00:00, 1716.01it/s]


Combination 1 by 1


8it [00:00, 192.48it/s]


Combination 2 by 2


28it [00:00, 204.96it/s]


Combination 3 by 3


56it [00:00, 178.00it/s]

{'P': 0.25, 'H': 1.0, 'F': 0.25, 'R': 0.75, 'D': 0.75, 'C': 0.5, 'X': 1.0, 'N': 0.25, 'PH': 0.25, 'PF': 0.25, 'PD': 0.25, 'PC': 0.25, 'PX': 0.25, 'HF': 0.25, 'HR': 0.75, 'HD': 0.75, 'HC': 0.5, 'HX': 1.0, 'HN': 0.25, 'FD': 0.25, 'FC': 0.25, 'FX': 0.25, 'RD': 0.5, 'RC': 0.25, 'RX': 0.75, 'RN': 0.25, 'DC': 0.25, 'DX': 0.75, 'DN': 0.25, 'CX': 0.5, 'XN': 0.25, 'PHF': 0.25, 'PHD': 0.25, 'PHC': 0.25, 'PHX': 0.25, 'PFD': 0.25, 'PFC': 0.25, 'PFX': 0.25, 'PDC': 0.25, 'PDX': 0.25, 'PCX': 0.25, 'HFD': 0.25, 'HFC': 0.25, 'HFX': 0.25, 'HRD': 0.5, 'HRC': 0.25, 'HRX': 0.75, 'HRN': 0.25, 'HDC': 0.25, 'HDX': 0.75, 'HDN': 0.25, 'HCX': 0.5, 'HXN': 0.25, 'FDC': 0.25, 'FDX': 0.25, 'FCX': 0.25, 'RDX': 0.5, 'RDN': 0.25, 'RCX': 0.25, 'RXN': 0.25, 'DCX': 0.25, 'DXN': 0.25}





In [161]:
get_eclat_support(transactions)

100%|██████████| 16/16 [00:00<00:00, 393.69it/s]
100%|██████████| 16/16 [00:00<?, ?it/s]
100%|██████████| 16/16 [00:00<00:00, 2683.50it/s]


Combination 1 by 1


13it [00:00, 186.45it/s]


Combination 2 by 2


78it [00:00, 199.98it/s]


Combination 3 by 3


286it [00:00, 351.93it/s]

{'P': 0.27058823529411763, 'T': 0.3058823529411765, 'H': 0.43529411764705883, 'I': 0.24705882352941178, 'F': 0.18823529411764706, 'A': 0.12941176470588237, 'R': 0.47058823529411764, 'D': 0.49411764705882355, 'O': 0.17647058823529413, 'L': 0.3176470588235294, 'C': 0.25882352941176473, 'N': 0.27058823529411763, 'M': 0.24705882352941178, 'PT': 0.08235294117647059, 'PH': 0.10588235294117647, 'PF': 0.08235294117647059, 'PR': 0.09411764705882353, 'PD': 0.11764705882352941, 'PL': 0.11764705882352941, 'PC': 0.10588235294117647, 'PN': 0.12941176470588237, 'PM': 0.09411764705882353, 'TH': 0.09411764705882353, 'TF': 0.10588235294117647, 'TR': 0.12941176470588237, 'TD': 0.18823529411764706, 'TL': 0.08235294117647059, 'TC': 0.08235294117647059, 'HI': 0.12941176470588237, 'HR': 0.21176470588235294, 'HD': 0.2, 'HO': 0.09411764705882353, 'HL': 0.17647058823529413, 'HC': 0.1411764705882353, 'HN': 0.1411764705882353, 'HM': 0.12941176470588237, 'IR': 0.11764705882352941, 'ID': 0.1411764705882353, 'IO': 0


