In [2]:
import pandas as pd
import ast

In [7]:
# Function name mapping for readability
name_mapping = {
    'get_covered_packages': 'package',
    'get_failing_tests_covered_classes': 'class',
    'get_failing_tests_covered_methods_for_class': 'method',
    'get_code_snippet': 'snippet',
    'get_comments': 'comment'
}

def shorten_pattern(pattern_tuple):
    """Convert long function names to short versions"""
    return ' -> '.join([name_mapping.get(func, func) for func in pattern_tuple])

In [12]:
n = 4 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                              short_pattern  correlation       p-value
0     class -> method -> snippet -> comment     0.124546  1.119029e-13
2   method -> snippet -> snippet -> snippet     0.087701  1.800158e-07
8  snippet -> snippet -> snippet -> comment     0.076909  4.770552e-06
6   method -> snippet -> comment -> snippet     0.070276  2.927467e-05
1     class -> method -> snippet -> snippet     0.068881  4.204946e-05
=== Top 5 low ===
                            short_pattern  correlation   p-value
7   class -> method -> comment -> snippet    -0.034164  0.042389
3     class -> method -> method -> method    -0.034114  0.042694
21     class -> method -> method -> class    -0.030764  0.067610
29     class -> method -> class -> method    -0.028961  0.085357
9    method -> method -> method -> method    -0.028176  0.094178
=== Top 5 high ===
                              short_pattern  correlation       p-value
3     class -> method -> snippet -> comment     0.116699 

In [11]:
n = 5 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                                        short_pattern  correlation   p-value
0    class -> method -> snippet -> snippet -> snippet     0.074302  0.000010
3   method -> snippet -> snippet -> snippet -> com...     0.073738  0.000012
8   snippet -> snippet -> snippet -> comment -> sn...     0.066395  0.000079
1    class -> method -> snippet -> comment -> snippet     0.064245  0.000134
28  method -> method -> snippet -> snippet -> snippet     0.050632  0.002620
=== Top 5 low ===
                                        short_pattern  correlation   p-value
4       class -> method -> method -> method -> method    -0.027285  0.105059
12     method -> method -> method -> method -> method    -0.020759  0.217558
21     class -> method -> snippet -> class -> snippet    -0.010057  0.550304
29     class -> method -> snippet -> comment -> class    -0.007775  0.644244
17  method -> snippet -> method -> snippet -> comment    -0.007775  0.644244
=== Top 5 high ===
                    

In [13]:
n = 3 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                   short_pattern  correlation       p-value
1   method -> snippet -> comment     0.122856  2.397947e-13
5  snippet -> snippet -> snippet     0.095285  1.410345e-08
0     class -> method -> snippet     0.092869  3.245035e-08
8  snippet -> comment -> snippet     0.092421  3.777672e-08
2   method -> snippet -> snippet     0.080004  1.942910e-06
=== Top 5 low ===
                 short_pattern  correlation   p-value
29     class -> class -> class    -0.070377  0.000029
15    class -> class -> method    -0.058278  0.000532
3    class -> method -> method    -0.053512  0.001470
7   class -> method -> comment    -0.044874  0.007663
13    class -> method -> class    -0.041852  0.012890
=== Top 5 high ===
                   short_pattern  correlation       p-value
0     class -> method -> snippet     0.182089  1.078132e-27
4     class -> method -> comment     0.116984  3.126071e-12
5   method -> comment -> snippet     0.101375  1.575199e-09
7   method -> snippe

In [15]:
name_mapping = {
    'get_covered_packages_pass': 'package_pass',
    'get_failing_tests_covered_classes_pass': 'class_pass',
    'get_failing_tests_covered_methods_for_class_pass': 'method_pass',
    'get_code_snippet_pass': 'snippet_pass',
    'get_comments_pass': 'comment_pass',
    'get_covered_packages_fail': 'package_fail',
    'get_failing_tests_covered_classes_fail': 'class_fail',
    'get_failing_tests_covered_methods_for_class_fail': 'method_fail',
    'get_code_snippet_fail': 'snippet_fail',
    'get_comments_fail': 'comment_fail'
}

def shorten_pattern(pattern_tuple):
    """Convert long function names to short versions"""
    return ' -> '.join([name_mapping.get(func, func) for func in pattern_tuple])

In [16]:
n = 4 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation_call_ok.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                                        short_pattern  correlation  \
0   class_pass -> method_pass -> snippet_pass -> c...     0.159393   
4   method_pass -> snippet_fail -> snippet_fail ->...     0.080145   
1   class_pass -> method_pass -> snippet_fail -> s...     0.077929   
9   snippet_fail -> snippet_fail -> snippet_fail -...     0.064590   
10  snippet_fail -> snippet_fail -> comment_fail -...     0.064590   

         p-value  
0   1.607227e-21  
4   1.863531e-06  
1   3.561241e-06  
9   1.229410e-04  
10  1.229410e-04  
=== Top 5 low ===
                                        short_pattern  correlation   p-value
20  class_pass -> method_fail -> comment_pass -> s...    -0.048397  0.004026
3   class_pass -> method_fail -> method_fail -> me...    -0.046964  0.005257
6   method_fail -> method_fail -> method_fail -> m...    -0.028633  0.088958
5   class_pass -> method_fail -> snippet_pass -> c...    -0.027700  0.099869
26  class_pass -> method_fail -> method_fai

In [17]:
n = 5 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation_call_ok.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                                        short_pattern  correlation   p-value
0   class_pass -> method_pass -> snippet_fail -> s...     0.075339  0.000007
5   snippet_fail -> snippet_fail -> snippet_fail -...     0.065386  0.000101
18  method_pass -> snippet_pass -> snippet_pass ->...     0.061154  0.000277
2   class_pass -> method_pass -> snippet_pass -> c...     0.060630  0.000313
4   method_pass -> snippet_fail -> snippet_fail ->...     0.054144  0.001290
=== Top 5 low ===
                                        short_pattern  correlation   p-value
1   class_pass -> method_fail -> method_fail -> me...    -0.027720  0.099621
9   method_fail -> method_fail -> method_fail -> m...    -0.019196  0.254204
27  class_pass -> method_pass -> snippet_fail -> c...    -0.015715  0.350607
11  class_pass -> method_fail -> method_fail -> me...    -0.006587  0.695630
14  class_pass -> method_fail -> method_fail -> sn...    -0.004601  0.784667
=== Top 5 high ===
                    

In [18]:
n = 3 # n-gram
benchmarks = ['d4j', 'bip']
models = ['llama3', 'llama3.1', 'mistral-nemo', 'qwen2.5-coder']

for bm in benchmarks:
    for model in models:
        correlation_file = f'./correlation/autofl/{n}-gram/{bm}/{model}/correlation_call_ok.csv'
        df = pd.read_csv(correlation_file)
        df['pattern'] = df['pattern'].apply(ast.literal_eval)
        df['short_pattern'] = df['pattern'].apply(shorten_pattern)

        top_5_high = df.sort_values(by='correlation', ascending=False).head(5)
        top_5_low = df.sort_values(by='correlation', ascending=True).head(5)
        print(f"=================={bm}: {model}==================")
        print("=== Top 5 high ===")
        print(top_5_high[['short_pattern', 'correlation', 'p-value']])
        print("=== Top 5 low ===")
        print(top_5_low[['short_pattern', 'correlation', 'p-value']])


=== Top 5 high ===
                                   short_pattern  correlation       p-value
2    method_pass -> snippet_pass -> comment_pass     0.163709  1.252243e-22
0      class_pass -> method_pass -> snippet_pass     0.119888  8.918609e-13
6   snippet_fail -> snippet_fail -> snippet_fail     0.091953  4.426208e-08
7    method_pass -> snippet_fail -> snippet_fail     0.080755  1.554491e-06
13  snippet_fail -> comment_fail -> snippet_fail     0.070397  2.835498e-05
=== Top 5 low ===
                                  short_pattern  correlation   p-value
4     class_pass -> method_fail -> snippet_pass    -0.078831  0.000003
17    class_pass -> method_fail -> comment_pass    -0.076924  0.000005
25      class_pass -> method_fail -> class_pass    -0.055296  0.001014
3      class_pass -> method_fail -> method_fail    -0.052303  0.001880
27  method_fail -> comment_pass -> snippet_pass    -0.043540  0.009676
=== Top 5 high ===
                                  short_pattern  correlation  