In [41]:
import pandas as pd
import warnings
from apyori import apriori
warnings.filterwarnings('ignore')

In [42]:
df = pd.read_csv('../Dataset/Dataset_EditActions.csv')
df = df.drop(['Architecture', 'Reproducible?', 'Code Snippet Present?', 'System Configuration Present?', 'Data Description Present?', 'Framework'], axis = 1)

In [43]:
# Replace Training Bug with value T, Model bug with value M, API bug with value A, Tensor & Input Bug with value I, and GPU Bug with value G
df.loc[df['Type of Bug'] == 'Training Bug', 'Type of Bug'] = 'T'
df.loc[df['Type of Bug'] == 'Model Bug', 'Type of Bug'] = 'M'
df.loc[df['Type of Bug'] == 'API Bug', 'Type of Bug'] = 'A'
df.loc[df['Type of Bug'] == 'Tensor and Input Bug', 'Type of Bug'] = 'I'
df.loc[df['Type of Bug'] == 'GPU Bug', 'Type of Bug'] = 'G'
df.loc[df['Type of Bug'] == 'Mixed Bug', 'Type of Bug'] = 'X'

In [44]:
# Map values from new columns as the following:
# Input Data Generation: D 
# Neural Network Definition: N
# Obsolete Parameter Removal: O
# Framework Migration: F
# Dataset Procurement: P
# Models and Tokenizers: M
# Logging: L
# Import Addition and Dependency Resolution: R
# Compiler Error Resolution: C
# Hyperparameter Initialization: H

# Replace all 1's with the corresponding letter
df['Input Data Generation'] = df['Input Data Generation'].replace(1, 'D')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(1, 'N')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(1, 'O')
df['Framework Migration'] = df['Framework Migration'].replace(1, 'F')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(1, 'P')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(1, 'M')
df['Logging'] = df['Logging'].replace(1, 'L')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(1, 'R')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(1, 'C')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(1, 'H')


df['Input Data Generation'] = df['Input Data Generation'].replace(0, '')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(0, '')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(0, '')
df['Framework Migration'] = df['Framework Migration'].replace(0, '')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(0, '')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(0, '')
df['Logging'] = df['Logging'].replace(0, '')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(0, '')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(0, '')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(0, '')

In [45]:
training_bugs = df[df['Type of Bug'] == 'T']
gpu_bugs = df[df['Type of Bug'] == 'G']
api_bugs = df[df['Type of Bug'] == 'A']
model_bugs = df[df['Type of Bug'] == 'M']
tensor_bugs = df[df['Type of Bug'] == 'I']
mixed_bugs = df[df['Type of Bug'] == 'X']

training_transactions = []
for i in range(0, len(training_bugs)):
    training_transactions.append([str(training_bugs.values[i,j]) for j in range(0, len(training_bugs.columns)) if str(training_bugs.values[i,j]) != ''])

gpu_transactions = []
for i in range(0, len(gpu_bugs)):
    gpu_transactions.append([str(gpu_bugs.values[i,j]) for j in range(0, len(gpu_bugs.columns)) if str(gpu_bugs.values[i,j]) != ''])

api_transactions = []
for i in range(0, len(api_bugs)):
    api_transactions.append([str(api_bugs.values[i,j]) for j in range(0, len(api_bugs.columns)) if str(api_bugs.values[i,j]) != ''])

model_transactions = []
for i in range(0, len(model_bugs)):
    model_transactions.append([str(model_bugs.values[i,j]) for j in range(0, len(model_bugs.columns)) if str(model_bugs.values[i,j]) != ''])

tensor_transactions = []
for i in range(0, len(tensor_bugs)):
    tensor_transactions.append([str(tensor_bugs.values[i,j]) for j in range(0, len(tensor_bugs.columns)) if str(tensor_bugs.values[i,j]) != ''])

mixed_transactions = []
for i in range(0, len(mixed_bugs)):
    mixed_transactions.append([str(mixed_bugs.values[i,j]) for j in range(0, len(mixed_bugs.columns)) if str(mixed_bugs.values[i,j]) != ''])

transactions = []
for i in range(0, len(df)):
    transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns)) if str(df.values[i,j]) != ''])

In [46]:
def get_apriori_results(rules, character):
    for result in list(rules):
     if character in result.items:
        items = ", ".join(result.items)
        support = result.support
        print(f"Items: {{{items}}}")
        print(f"Support: {support:.4f}")
        if result.ordered_statistics:
            print("Association Rules:")
            for rule in result.ordered_statistics:
                if character in rule.items_base:
                    antecedent = ", ".join(rule.items_base)
                    consequent = ", ".join(rule.items_add)
                    confidence = rule.confidence
                    lift = rule.lift
                    print(f"  {{{antecedent}}} => {{{consequent}}}")
                    print(f"Confidence: {confidence:.4f}")

In [47]:
get_apriori_results(apriori(training_transactions, max_length = 3), 'T')

Items: {T}
Support: 1.0000
Association Rules:
Items: {T, C}
Support: 0.2692
Association Rules:
  {T} => {C}
Confidence: 0.2692
Items: {T, D}
Support: 0.6154
Association Rules:
  {T} => {D}
Confidence: 0.6154
Items: {T, F}
Support: 0.3462
Association Rules:
  {T} => {F}
Confidence: 0.3462
Items: {T, H}
Support: 0.3077
Association Rules:
  {T} => {H}
Confidence: 0.3077
Items: {T, L}
Support: 0.2692
Association Rules:
  {T} => {L}
Confidence: 0.2692
Items: {T, N}
Support: 0.1154
Association Rules:
  {T} => {N}
Confidence: 0.1154
Items: {O, T}
Support: 0.1538
Association Rules:
  {T} => {O}
Confidence: 0.1538
Items: {T, P}
Support: 0.2692
Association Rules:
  {T} => {P}
Confidence: 0.2692
Items: {T, R}
Support: 0.4231
Association Rules:
  {T} => {R}
Confidence: 0.4231
Items: {T, H, C}
Support: 0.1538
Association Rules:
  {T} => {H, C}
Confidence: 0.1538
  {T, C} => {H}
Confidence: 0.5714
  {T, H} => {C}
Confidence: 0.5000
Items: {T, L, C}
Support: 0.1154
Association Rules:
  {T} => {L, C}


In [48]:
get_apriori_results(apriori(model_transactions, max_length = 3), 'M')

Items: {M}
Support: 1.0000
Association Rules:
Items: {M, C}
Support: 0.3000
Association Rules:
  {M} => {C}
Confidence: 0.3000
Items: {M, D}
Support: 0.3500
Association Rules:
  {M} => {D}
Confidence: 0.3500
Items: {M, F}
Support: 0.2000
Association Rules:
  {M} => {F}
Confidence: 0.2000
Items: {M, H}
Support: 0.5000
Association Rules:
  {M} => {H}
Confidence: 0.5000
Items: {M, L}
Support: 0.4500
Association Rules:
  {M} => {L}
Confidence: 0.4500
Items: {M, N}
Support: 0.7500
Association Rules:
  {M} => {N}
Confidence: 0.7500
Items: {O, M}
Support: 0.1500
Association Rules:
  {M} => {O}
Confidence: 0.1500
Items: {M, P}
Support: 0.4000
Association Rules:
  {M} => {P}
Confidence: 0.4000
Items: {M, R}
Support: 0.5500
Association Rules:
  {M} => {R}
Confidence: 0.5500
Items: {M, D, C}
Support: 0.1000
Association Rules:
  {M} => {D, C}
Confidence: 0.1000
  {M, C} => {D}
Confidence: 0.3333
  {M, D} => {C}
Confidence: 0.2857
Items: {M, F, C}
Support: 0.1000
Association Rules:
  {M} => {F, C}


In [49]:
get_apriori_results(apriori(gpu_transactions, max_length = 3), 'G')

Items: {G}
Support: 1.0000
Association Rules:


In [50]:
get_apriori_results(apriori(tensor_transactions, max_length = 3), 'I')

Items: {I}
Support: 1.0000
Association Rules:
Items: {I, C}
Support: 0.3333
Association Rules:
  {I} => {C}
Confidence: 0.3333
Items: {D, I}
Support: 0.5714
Association Rules:
  {I} => {D}
Confidence: 0.5714
Items: {H, I}
Support: 0.5238
Association Rules:
  {I} => {H}
Confidence: 0.5238
Items: {L, I}
Support: 0.4286
Association Rules:
  {I} => {L}
Confidence: 0.4286
Items: {O, I}
Support: 0.3333
Association Rules:
  {I} => {O}
Confidence: 0.3333
Items: {P, I}
Support: 0.1905
Association Rules:
  {I} => {P}
Confidence: 0.1905
Items: {R, I}
Support: 0.4762
Association Rules:
  {I} => {R}
Confidence: 0.4762
Items: {I, D, C}
Support: 0.2857
Association Rules:
  {I} => {D, C}
Confidence: 0.2857
  {I, C} => {D}
Confidence: 0.8571
  {D, I} => {C}
Confidence: 0.5000
Items: {O, I, C}
Support: 0.1905
Association Rules:
  {I} => {O, C}
Confidence: 0.1905
  {I, C} => {O}
Confidence: 0.5714
  {O, I} => {C}
Confidence: 0.5714
Items: {R, I, C}
Support: 0.1905
Association Rules:
  {I} => {C, R}
Confi

In [51]:
get_apriori_results(apriori(mixed_transactions, max_length = 3), 'X')

Items: {X}
Support: 1.0000
Association Rules:
Items: {X, C}
Support: 0.5000
Association Rules:
  {X} => {C}
Confidence: 0.5000
Items: {X, D}
Support: 0.7500
Association Rules:
  {X} => {D}
Confidence: 0.7500
Items: {F, X}
Support: 0.2500
Association Rules:
  {X} => {F}
Confidence: 0.2500
Items: {H, X}
Support: 1.0000
Association Rules:
  {X} => {H}
Confidence: 1.0000
Items: {N, X}
Support: 0.2500
Association Rules:
  {X} => {N}
Confidence: 0.2500
Items: {P, X}
Support: 0.2500
Association Rules:
  {X} => {P}
Confidence: 0.2500
Items: {X, R}
Support: 0.7500
Association Rules:
  {X} => {R}
Confidence: 0.7500
Items: {X, D, C}
Support: 0.2500
Association Rules:
  {X} => {D, C}
Confidence: 0.2500
  {X, C} => {D}
Confidence: 0.5000
  {X, D} => {C}
Confidence: 0.3333
Items: {F, X, C}
Support: 0.2500
Association Rules:
  {X} => {F, C}
Confidence: 0.2500
  {X, C} => {F}
Confidence: 0.5000
  {F, X} => {C}
Confidence: 1.0000
Items: {H, X, C}
Support: 0.5000
Association Rules:
  {X} => {H, C}
Confi

In [52]:
get_apriori_results(apriori(api_transactions, max_length = 3), 'A')

Items: {A}
Support: 1.0000
Association Rules:
Items: {A, D}
Support: 0.3636
Association Rules:
  {A} => {D}
Confidence: 0.3636
Items: {A, H}
Support: 0.3636
Association Rules:
  {A} => {H}
Confidence: 0.3636
Items: {A, L}
Support: 0.1818
Association Rules:
  {A} => {L}
Confidence: 0.1818
Items: {A, N}
Support: 0.1818
Association Rules:
  {A} => {N}
Confidence: 0.1818
Items: {A, P}
Support: 0.2727
Association Rules:
  {A} => {P}
Confidence: 0.2727
Items: {A, R}
Support: 0.4545
Association Rules:
  {A} => {R}
Confidence: 0.4545
Items: {A, H, D}
Support: 0.1818
Association Rules:
  {A} => {H, D}
Confidence: 0.1818
  {A, D} => {H}
Confidence: 0.5000
  {A, H} => {D}
Confidence: 0.5000
Items: {A, D, R}
Support: 0.2727
Association Rules:
  {A} => {D, R}
Confidence: 0.2727
  {A, D} => {R}
Confidence: 0.7500
  {A, R} => {D}
Confidence: 0.6000
Items: {A, H, R}
Support: 0.1818
Association Rules:
  {A} => {H, R}
Confidence: 0.1818
  {A, H} => {R}
Confidence: 0.5000
  {A, R} => {H}
Confidence: 0.40

In [53]:
from pyECLAT import ECLAT

def get_eclat_support(transactions):
    eclat_instance = ECLAT(data=pd.DataFrame(transactions), verbose=True)
    get_ECLAT_indexes, get_ECLAT_supports = eclat_instance.fit(separator='', verbose=True)
    print (len(get_ECLAT_supports))

In [54]:
get_eclat_support(training_transactions)

100%|██████████| 11/11 [00:00<00:00, 236.22it/s]
100%|██████████| 11/11 [00:00<00:00, 11037.64it/s]
100%|██████████| 11/11 [00:00<00:00, 846.18it/s]


Combination 1 by 1


10it [00:00, 89.41it/s]


Combination 2 by 2


45it [00:00, 84.89it/s] 


Combination 3 by 3


8it [00:00, 77.39it/s]

120it [00:01, 83.35it/s]


55


In [55]:
get_eclat_support(gpu_transactions)

100%|██████████| 1/1 [00:00<00:00, 182.78it/s]
100%|██████████| 1/1 [00:00<00:00, 996.75it/s]
100%|██████████| 1/1 [00:00<00:00, 498.08it/s]


Combination 1 by 1


1it [00:00, 66.87it/s]


Combination 2 by 2


0it [00:00, ?it/s]


Combination 3 by 3


0it [00:00, ?it/s]

1





In [56]:
get_eclat_support(model_transactions)

100%|██████████| 11/11 [00:00<00:00, 104.74it/s]
100%|██████████| 11/11 [00:00<00:00, 11037.64it/s]
100%|██████████| 11/11 [00:00<00:00, 478.11it/s]


Combination 1 by 1


10it [00:00, 34.48it/s]


Combination 2 by 2


14it [00:00, 39.25it/s]

45it [00:00, 50.28it/s]


Combination 3 by 3


120it [00:02, 57.06it/s]

112





In [57]:
get_eclat_support(api_transactions)

  0%|          | 0/11 [00:00<?, ?it/s]

100%|██████████| 11/11 [00:00<00:00, 237.37it/s]
100%|██████████| 11/11 [00:00<00:00, 10714.66it/s]
100%|██████████| 11/11 [00:00<00:00, 675.92it/s]


Combination 1 by 1


10it [00:00, 78.80it/s]


Combination 2 by 2


45it [00:00, 77.32it/s]


Combination 3 by 3


120it [00:01, 112.70it/s]

59





In [58]:
get_eclat_support(tensor_transactions)

100%|██████████| 11/11 [00:00<00:00, 212.41it/s]
100%|██████████| 11/11 [00:00<?, ?it/s]
100%|██████████| 11/11 [00:00<00:00, 594.69it/s]


Combination 1 by 1


9it [00:00, 107.72it/s]


Combination 2 by 2


36it [00:00, 103.61it/s]


Combination 3 by 3


84it [00:00, 100.80it/s]

73





In [59]:
get_eclat_support(mixed_transactions)

100%|██████████| 9/9 [00:00<00:00, 202.67it/s]
100%|██████████| 9/9 [00:00<?, ?it/s]
100%|██████████| 9/9 [00:00<00:00, 659.93it/s]


Combination 1 by 1


8it [00:00, 49.28it/s]


Combination 2 by 2


28it [00:00, 74.33it/s]


Combination 3 by 3


56it [00:00, 105.85it/s]

62





In [60]:
get_eclat_support(transactions)

100%|██████████| 16/16 [00:00<00:00, 156.10it/s]
100%|██████████| 16/16 [00:00<?, ?it/s]
100%|██████████| 16/16 [00:00<00:00, 671.52it/s]


Combination 1 by 1


13it [00:00, 84.64it/s]


Combination 2 by 2


78it [00:00, 91.37it/s]


Combination 3 by 3


286it [00:03, 72.41it/s]

63



