In [70]:
import pandas as pd
import os


def load_and_combine_pkl_files(directory_path):
    # List to hold all the dataframes
    dataframes = []

    # Loop through all the files in the specified directory
    for filename in os.listdir(directory_path):
        if filename.endswith('similarity_perturbed_inferenced_df.pkl'):
            # Construct full file path
            file_path = os.path.join(directory_path, filename)
            # Load the dataframe from a pkl file
            df = pd.read_pickle(file_path)
            # Append the dataframe to the list
            dataframes.append(df)

    # Concatenate all dataframes into one big dataframe
    big_df = pd.concat(dataframes, ignore_index=True)

    return big_df


# Usage
directory_path = '/Users/ximing/Desktop/Explainprompt'
big_df = load_and_combine_pkl_files(directory_path)
big_df = big_df
print(big_df)


                                                prompt real_output  \
0    the refugees able to look ahead and resist liv...  ^POSITIVE^   
1    puts the dutiful efforts of more disciplined g...  ^POSITIVE^   
2    big stars and high production values are stand...  ^POSITIVE^   
3    it 's funny , as the old saying goes , because...  ^POSITIVE^   
4    i 've seen in a while , a meander through worn...  ^POSITIVE^   
..                                                 ...         ...   
991  rare to find a film to which the adjective ` g...  ^POSITIVE^   
992  , romance , tragedy , false dawns , real dawns...  ^POSITIVE^   
993  a big-budget/all-star movie as unblinkingly pu...  ^POSITIVE^   
994  the refugees able to look ahead and resist liv...  ^POSITIVE^   
995  big stars and high production values are stand...  ^POSITIVE^   

                                           token_level  \
0    {'tokens': [{'token': 'the', 'type': 'input', ...   
1    {'tokens': [{'token': 'puts', 'type': 

In [71]:
print(big_df.columns)

Index(['prompt', 'real_output', 'token_level', 'word_level', 'label',
       'component_level', 'instruction', 'query', 'component_range',
       'instruction_weight', 'query_weight', 'instructions_tokens',
       'query_tokens', 'instruction_token_top_0.2_peturbed',
       'instruction_token_bottom_0.2_peturbed', 'query_token_top_0.2_peturbed',
       'query_token_bottom_0.2_peturbed', 'top_reconstructed_instruction_0.2',
       'top_reconstructed_query_0.2', 'bottom_reconstructed_instruction_0.2',
       'bottom_reconstructed_query_0.2',
       'top_reconstructed_instruction_0.2_result',
       'top_reconstructed_query_0.2_result',
       'bottom_reconstructed_instruction_0.2_result',
       'bottom_reconstructed_query_0.2_result'],
      dtype='object')


In [72]:
print(big_df.iloc[4]['word_level'])

{'tokens': [{'token': 'i', 'type': 'input', 'value': 0.021453715780720754, 'position': 0}, {'token': "'ve", 'type': 'input', 'value': 0.05163423586147464, 'position': 1}, {'token': 'seen', 'type': 'input', 'value': 0.03017845061310582, 'position': 2}, {'token': 'in', 'type': 'input', 'value': 0.030178468056911916, 'position': 3}, {'token': 'a', 'type': 'input', 'value': 0.021454850957790275, 'position': 4}, {'token': 'while', 'type': 'input', 'value': 0.030178468056911916, 'position': 5}, {'token': ',', 'type': 'input', 'value': 0.021453715780720754, 'position': 6}, {'token': 'a', 'type': 'input', 'value': 0.02145576780456273, 'position': 7}, {'token': 'meander', 'type': 'input', 'value': 0.05163423586147464, 'position': 8}, {'token': 'through', 'type': 'input', 'value': 0.02145576780456273, 'position': 9}, {'token': 'worn-out', 'type': 'input', 'value': 0.06035693611382383, 'position': 10}, {'token': 'material', 'type': 'input', 'value': 0.030178468056911916, 'position': 11}, {'token'

In [73]:
print(big_df['bottom_reconstructed_query_0.2_result'][:20])


0     ^POSITIVE^
1     ^POSITIVE^
2     ^POSITIVE^
3     ^POSITIVE^
4     ^NEGATIVE^
5     ^POSITIVE^
6     ^POSITIVE^
7     ^NEGATIVE^
8     ^NEGATIVE^
9     ^POSITIVE^
10    ^POSITIVE^
11    ^POSITIVE^
12    ^NEGATIVE^
13    ^NEGATIVE^
14    ^POSITIVE^
15    ^NEGATIVE^
16    ^NEGATIVE^
17    ^POSITIVE^
18    ^NEGATIVE^
19    ^POSITIVE^
Name: bottom_reconstructed_query_0.2_result, dtype: object


In [74]:
differences = big_df['real_output'] != big_df['bottom_reconstructed_query_0.2_result']
print(differences.sum()/len(differences))

0.21887550200803213


In [75]:
differences = big_df['real_output'] != big_df['top_reconstructed_query_0.2_result']
print(differences.sum()/len(differences))

0.1576305220883534


In [46]:
differences = big_df['real_output'] != big_df['bottom_reconstructed_instruction_0.2_result']
print(differences.sum()/len(differences))

0.1891348088531187


In [43]:
differences = big_df['real_output'] != big_df['top_reconstructed_instruction_0.2_result']
print(differences.sum()/len(differences))

0.9989939637826962


In [30]:
print(big_df['instruction_weight'].mean())

0.7873167049738102


In [31]:
print(big_df['query_weight'].mean())

0.2114628815611635
