Analysis for ChatGPT synonym replacement (with context)

In [1]:
! pip install matplotlib
! pip install seaborn
! pip install tabulate
! pip install altair
! pip install plotly



In [2]:
import matplotlib.pyplot as plt
from data import load_data, save_to_json, load_from_json
from matplotlib.gridspec import GridSpec
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
import pandas as pd
from itertools import zip_longest
from pandas import MultiIndex
import altair as alt
import os

In [3]:
from math import log
# from scipy.stats import kendalltau

COLORS = ['red', 'blue', 'green', 'orange', 'purple', 'cyan', 'magenta', 'yellow', 'black', 'grey', 'pink']

def avg(l):
    return sum(l) / len(l)

def brier_score(pred):
    return avg([(x - 1) ** 2 for x in pred])

def log_loss(pred):
    return -1 * avg([log(x) for x in pred])

# def kendall_tau_for_results(model_results):
#     detection_scores = [i['detection_score'] for i in model_results] 
#     self_preferences = [i['self_preference'] for i in model_results]
    
#     return kendalltau(detection_scores, self_preferences).correlation

# def kendall_tau(x, y):
#     return kendalltau(x, y).correlation



In [4]:
models = results_to_load = ["gpt4_results_2_ChatGPT_with_context_50_sentence.json", "gpt4_results_5_ChatGPT_with_context_50_sentence.json", 
                            'gpt4_results_2_ChatGPT_with_context_bothsentences_50_sentence.json', 'gpt4_results_5_ChatGPT_with_context_bothsentences_50_sentence.json']
data_path = os.path.join("results","cnn","synonym")


# xsum_responses, xsum_articles, xsum_keys = load_data('xsum')
cnn_responses, cnn_articles, cnn_keys = load_data('cnn')

# xsum_results = {}
cnn_results = {}
for model in results_to_load:
    # xsum_results[model] = load_from_json(f'results/xsum/{model}_results.json')
    cnn_results[model] = load_from_json(os.path.join(data_path, model))
    


In [8]:
# loading old results for compairson for the selected keys
# all relevant keys
keys_list = [d['key'] for d in cnn_results['gpt4_results_5_ChatGPT_with_context_50_sentence.json']]
keys_list = set(keys_list)

# filter old results to keep only the 50 sentences
old_result = load_from_json(os.path.join("results","cnn","gpt4_results.json")) 
filtered_list = []
# Iterate over each dictionary in list_of_dicts
for d in old_result:
    # Check if any key in the current dictionary is in the keys_set
    if d['key'] in keys_list:
        filtered_list.append(d)

print(len(filtered_list))
print(len(cnn_results['gpt4_results_5_ChatGPT_with_context_50_sentence.json']))

# add to results
cnn_results['gpt4'] = filtered_list


200
200


In [9]:
models = models + ["gpt4"]

In [10]:
def print_zapped_table():
    table = [[model, 
                        avg([result['detection_score'] for result in cnn_results[model]]),
                        avg([0.5 if result['detection_score'] == 0.5 else 1 if result['detection_score'] > 0.5 else 0 for result in cnn_results[model]]),


    ] for model in models]

    print(tabulate(sorted(table, key = lambda x: x[-1]), headers = ['Model', 'Self-Rec (CNN)', 'Zapped' ]))
    # print(tabulate(sorted())

print_zapped_table()

Model                                                                 Self-Rec (CNN)    Zapped
------------------------------------------------------------------  ----------------  --------
gpt4_results_5_ChatGPT_with_context_50_sentence.json                        0.52802      0.505
gpt4_results_2_ChatGPT_with_context_50_sentence.json                        0.516993     0.51
gpt4_results_2_ChatGPT_with_context_bothsentences_50_sentence.json          0.70805      0.815
gpt4_results_5_ChatGPT_with_context_bothsentences_50_sentence.json          0.709777     0.83
gpt4                                                                        0.74366      0.885


In [11]:
# Main pairwise results
table = [[model, 
                    avg([result['detection_score'] for result in cnn_results[model]])
] for model in models]

table = [[row[0]] + [round(i, 3) for i in row[1:]] for row in table]
print(tabulate(table, headers = ['Model', 'Self-Rec']))

Model                                                                 Self-Rec
------------------------------------------------------------------  ----------
gpt4_results_2_ChatGPT_with_context_50_sentence.json                     0.517
gpt4_results_5_ChatGPT_with_context_50_sentence.json                     0.528
gpt4_results_2_ChatGPT_with_context_bothsentences_50_sentence.json       0.708
gpt4_results_5_ChatGPT_with_context_bothsentences_50_sentence.json       0.71
gpt4                                                                     0.744


In [12]:
results = cnn_results
print(tabulate(sorted([[model, avg([result['detection_score'] for result in results[model]])]])))

------------------------------------------------------------------  --------
gpt4_results_5_ChatGPT_with_context_bothsentences_50_sentence.json  0.709777
------------------------------------------------------------------  --------


In [12]:
cnn_results['gpt4_results_5_ChatGPT_no_context_50_sentence.json']

[{'key': '8af047c2d83a91e8b745adfcaa7c282dfe1030a2',
  'model': 'human',
  'forward_detection': '1',
  'forward_detection_probability': 0.700344557529218,
  'backward_detection': '1',
  'backward_detection_probability': 0.9210745696900526,
  'detection_score': 0.38963403468795804},
 {'key': '8af047c2d83a91e8b745adfcaa7c282dfe1030a2',
  'model': 'claude',
  'forward_detection': '1',
  'forward_detection_probability': 0.9443956177776911,
  'backward_detection': '1',
  'backward_detection_probability': 0.7264802656015464,
  'detection_score': 0.6089572117482724},
 {'key': '8af047c2d83a91e8b745adfcaa7c282dfe1030a2',
  'model': 'gpt35',
  'forward_detection': '1',
  'forward_detection_probability': 0.9641864887703017,
  'backward_detection': '1',
  'backward_detection_probability': 0.9147887140297144,
  'detection_score': 0.5246982861867379},
 {'key': '8af047c2d83a91e8b745adfcaa7c282dfe1030a2',
  'model': 'llama',
  'forward_detection': '1',
  'forward_detection_probability': 0.990309119174

In [13]:
keys_list = [d['key'] for d in cnn_results['gpt4_results_5_ChatGPT_no_context_50_sentence.json']]
print(keys_list)

['8af047c2d83a91e8b745adfcaa7c282dfe1030a2', '8af047c2d83a91e8b745adfcaa7c282dfe1030a2', '8af047c2d83a91e8b745adfcaa7c282dfe1030a2', '8af047c2d83a91e8b745adfcaa7c282dfe1030a2', 'dfcacd955a9168b2cf599c57a539dbfce43ea2a9', 'dfcacd955a9168b2cf599c57a539dbfce43ea2a9', 'dfcacd955a9168b2cf599c57a539dbfce43ea2a9', 'dfcacd955a9168b2cf599c57a539dbfce43ea2a9', 'f2463954bc7215d8a1f98cf0d85f280fd2affa58', 'f2463954bc7215d8a1f98cf0d85f280fd2affa58', 'f2463954bc7215d8a1f98cf0d85f280fd2affa58', 'f2463954bc7215d8a1f98cf0d85f280fd2affa58', '0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43', '0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43', '0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43', '0e7a92a6f06e94cdd613860d4a3f4b92a06a2b43', '2905a15c5947042e42f4b52dc201d24822af20f1', '2905a15c5947042e42f4b52dc201d24822af20f1', '2905a15c5947042e42f4b52dc201d24822af20f1', '2905a15c5947042e42f4b52dc201d24822af20f1', '808a3d788317af05686ee71a71d2fbc5c19267bc', '808a3d788317af05686ee71a71d2fbc5c19267bc', '808a3d788317af05686ee71a71d2fb

In [None]:
# Print table showing the kendall_tau_for_result for cn and xsum on each main model
print(tabulate(sorted([[model, avg([result['detection_score'] for result in xsum_results[model]]), avg([result['self_preference'] for result in xsum_results[model]]), avg([result['detection_score'] for result in cnn_results[model]]), avg([result['self_preference'] for result in cnn_results[model]]),kendall_tau_for_results(xsum_results[model]), kendall_tau_for_results(cnn_results[model])] for model in models if 'llama' in model], key = lambda x:x[0]), headers=['Model', 'Self-Rec [X]', 'Self-Pref [X]', 'Self-Rec [C]', 'Self-Pref [C]', 'XSUM T', 'CNN T']))

In [None]:
def show_scatterplots(results, include_ambiguous=True):
    num_models = len(results.keys())

    plt.figure(figsize=(num_models * 6, num_models * 2))
    colors = ['blue', 'green', 'red']

    for i, model in enumerate(results.keys()):
        if not include_ambiguous:
            detection_scores = [i['detection_score'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
            self_preferences = [i['self_preference'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
        else:
            detection_scores = [i['detection_score'] for i in results[model]]
            self_preferences = [i['self_preference'] for i in results[model]]
        
        plt.subplot(1, 3, i+1)
        plt.scatter(detection_scores, self_preferences, color=colors[i])
        plt.xlabel('Detection Score')
        plt.ylabel('Self-Preference')
        plt.title(MODEL_TO_STRING[model])

    plt.suptitle('Detection Score vs Self-Preference (Token Probability)', fontsize=16, y=1) 
    plt.tight_layout()
    plt.show()

In [None]:
def show_scatterplots(results, include_ambiguous=True):
    num_models = len(results.keys())

    plt.figure(figsize=(num_models * 6, num_models * 2))
    colors = ['blue', 'green', 'red']

    for i, model in enumerate(results.keys()):
        if not include_ambiguous:
            detection_scores = [i['detection_score'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
            self_preferences = [i['self_preference'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
        else:
            detection_scores = [i['detection_score'] for i in results[model]]
            self_preferences = [i['self_preference'] for i in results[model]]
        
        plt.subplot(1, 3, i+1)
        plt.scatter(detection_scores, self_preferences, color=colors[i])
        plt.xlabel('Detection Score')
        plt.ylabel('Self-Preference')
        plt.title(MODEL_TO_STRING[model])

    plt.suptitle('Detection Score vs Self-Preference (Token Probability)', fontsize=16, y=1) 
    plt.tight_layout()
    plt.show()

In [None]:
def show_scatterplots_with_marginals(results, include_ambiguous=True):
    def is_valid(item):
        return 'detection_score' in item and 'self_preference' in item 

    for i, model in enumerate(results.keys()):
        if not include_ambiguous:
            detection_scores = [item['detection_score'] for item in results[model] if is_valid(item) and item['forward_comparison'] != item['backward_comparison'] and item['forward_detection'] != item['backward_detection']]
            self_preferences = [item['self_preference'] for item in results[model] if is_valid(item) and item['forward_comparison'] != item['backward_comparison'] and item['forward_detection'] != item['backward_detection']]
        else:
            detection_scores = [item['detection_score'] for item in results[model] if is_valid(item)]
            self_preferences = [item['self_preference'] for item in results[model] if is_valid(item)]

        # Create a jointplot for each model
        joint_plot = sns.jointplot(x=detection_scores, y=self_preferences, kind="scatter", color=COLORS[i % len(COLORS)], marginal_kws=dict(bins=15, fill=True))

        joint_plot.ax_joint.set_xlim(0, 1.0)
        joint_plot.ax_joint.set_ylim(0, 1.0)

        # Adjust the title position and font size
        joint_plot.fig.suptitle(f'{MODEL_TO_STRING[model]}', fontsize=14, y=1.05)
        # joint_plot.fig.suptitle(f'Detection Score vs Self-Preference (Token Probability) for {MODEL_TO_STRING[model]}', fontsize=14, y=1.05)

        # Adjust axis labels font size
        joint_plot.set_axis_labels('Detection Score', 'Self-Preference', fontsize=12)

        # Show the plot
        plt.savefig(f'plots/scatterplots/xsum/{model}.png', bbox_inches='tight')
        plt.show()


In [None]:
show_scatterplots(results)
show_scatterplots(results, include_ambiguous=False)

In [None]:
def show_heatmaps(results, include_ambiguous=True):
    num_models = len(results.keys())

    plt.figure(figsize=(num_models * 6, num_models * 2))
    colors = ['blue', 'green', 'red', 'orange']

    for i, model in enumerate(results.keys()):
        if not include_ambiguous:
            detection_scores = [i['detection_score'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
            self_preferences = [i['self_preference'] for i in results[model] if i['forward_comparison'] != i['backward_comparison']]
        else:
            detection_scores = [i['detection_score'] for i in results[model]]
            self_preferences = [i['self_preference'] for i in results[model]]
        
        plt.subplot(1, num_models, i+1)
        plt.hexbin(detection_scores, self_preferences, gridsize=30, cmap='Blues')
        plt.colorbar(label='Density')
        plt.xlabel('Detection Score')
        plt.ylabel('Self-Preference')
        plt.title(MODEL_TO_STRING[model])

    plt.suptitle('Detection Score vs Self-Preference (Token Probability)', fontsize=16, y=1) 
    plt.tight_layout()
    plt.show()

In [None]:
show_heatmaps(results)
show_heatmaps(results, include_ambiguous=False)

In [71]:
def plot_detection_score_vs_correlation(*results_dicts):
    # Extract 'a' and 'b' values
    x_values = []
    y_values = []
    plot_models = []
    d = {}
    for results in results_dicts:
        keys = [model for model in models if any(s in model for s in ['_2_ft_', '_10_ft_', '_500_ft_'])] #list(results.keys())
        plot_models += keys
        x_values += [avg([i['detection_score'] for i in results[key] if 'detection_score' in i]) for key in keys]
        y_values += [avg([i['self_preference'] for i in results[key] if 'self_preference' in i]) for key in keys]
        y_values += [kendall_tau_for_results(results[key]) for key in keys]
        for key in keys:
            d[key] = [(i['detection_score'], i['self_preference']) for i in results[key] if 'detection_score' in i and 'self_preference' in i]

    save_to_json(d, 'xsum_plot_data.json')
    # Create a scatter plot
    plt.figure(figsize=(8, 8))

    # Generate a color map with a unique color for each point
    markers = (['o'] * 3 + ['^'] * 3) * 4
    colors = ['red'] * 12 + ['blue'] * 12
    # colors = ['red', 'blue', 'green', 'orange', 'yellow', 'purple', 'black', 'pink', 'grey'][:len(plot_models)]
    
    # Plot each point
    print(plot_models)
    for i, (a, b, color, marker) in enumerate(zip_longest(x_values, y_values, colors, markers)):
        plt.scatter(a, b, color=color, marker=marker, label=MODEL_TO_STRING[plot_models[i]])

    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.gca().set_aspect('equal', adjustable='box')
    
    # Create a legend below the plot in a vertical column
    plt.legend(title="Key", loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=1)

    # Add grid, title, and axis labels
    plt.grid(True)
    # plt.title('Detection Score vs. Self-Preference')
    plt.xlabel('Self-Recognition Score')
    plt.ylabel("Self-Preference")

    # Show the plot
    plt.savefig(f'plots/xsum_scaling_law.png', bbox_inches='tight')
    plt.show()
    return zip(plot_models, x_values, y_values)

# Looking at what's replaced

In [8]:
from tqdm import tqdm
import time
import random
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
import difflib
import pandas as pd

In [7]:


# Set the seed for reproducibility
random.seed(123)

all_words = []
all_syn = []

def get_synonyms(word):
    """
    Finds and returns synonyms for a given word using WordNet.
    This function takes a word as input, searches for its synonyms 
    using the WordNet synsets, and returns a list of synonyms.

    Args:
        word (str): The word for which to find synonyms.
    Returns:
        list: A list of synonyms for the input word. Returns an 
              empty list if no synonyms are found.
    """
    synonyms = []
    for syn in wordnet.synsets(word):
        for name in syn.lemma_names():
            # Exclude the original word to avoid replacing it with itself
            name = name.replace('_',' ')
            if name.lower() != word.lower():
                synonyms.append(name)
    return synonyms

def replace_with_synonyms(sentence, num_words_to_replace):
    """
    Replaces a specified number of words in a sentence with their synonyms.

    This function takes a sentence and an integer specifying the number of words 
    to replace with synonyms. It randomly samples 2x the required number of words 
    to ensure replacements are possible even if some words do not have synonyms.
    It uses the `get_synonyms` function to find synonyms for each sampled word,
    and replaces words in the sentence until the specified number is reached.

    Args:
        sentence (str): The input sentence from which words will be replaced.
        num_words_to_replace (int): The number of words in the sentence to be replaced by synonyms.

    Returns:
        str: The modified sentence with the specified number of words replaced by synonyms.
    """
    # Tokenize the sentence
    words = word_tokenize(sentence)
    # Filter out non-alphabetic tokens (like punctuation)
    words_alpha = [word for word in words if word.isalpha()]
    
    # Randomly sample words to replace - i use 2x words just to account for words without synonym
    words_to_replace = random.sample(words_alpha, min(2*num_words_to_replace, len(words_alpha)))
    
    # Create a new sentence with synonyms replaced
    words_replaced = 0
    new_sentence = []
    for word in words:
        if word in words_to_replace:
            synonyms = get_synonyms(word)
            if synonyms and words_replaced < num_words_to_replace:
                # Replace with a random synonym
                new_word = random.choice(synonyms)
                new_sentence.append(new_word)
                #operational
                all_words.append(word)
                all_syn.append(synonyms)
                words_replaced +=1
            else:
                # If no synonym is found, keep the original word
                new_sentence.append(word)
        else:
            new_sentence.append(word)
    
    return ' '.join(new_sentence)



In [9]:
# Only suitable for GPT models
def explore_synonym(
    dataset,
    model,
    starting_idx=0,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):

    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    all_original = []
    all_modified = []

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]
        all_original.append(source_summary)
        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)
            all_modified.append(source_summary)



    return all_original, all_modified


In [10]:
num_synonym = 2
all_original, all_modified = explore_synonym(
    "cnn", "gpt4",replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=950
)


Processing keys: 100%|██████████| 50/50 [00:01<00:00, 26.89it/s]


In [23]:
# Function to remove space before commas
def remove_space_before_comma(sentences):
    return [sentence.replace(" ,", ",") for sentence in sentences]

# Cleaned sentences
all_modified = remove_space_before_comma(all_modified)

In [24]:
def find_difference_context(sentence1, sentence2):
    # Tokenize the sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Use difflib to find matching blocks and differences
    matcher = difflib.SequenceMatcher(None, words1, words2)
    diffs = []
    
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag != 'equal':  # Capture the differing segments
            # Extract two words before and after the difference (if they exist)
            context1 = ' '.join(words1[max(0, i1-2):i2+2])
            context2 = ' '.join(words2[max(0, j1-2):j2+2])
            diffs.append((context1, context2))
    
    return diffs

def generate_report(list1, list2):
    data = {'Sentence1': [], 'Sentence2': [], 'Difference Context 1': [], 'Difference Context 2': []}

    # Ensure both lists are of the same length
    for sentence1, sentence2 in zip(list1, list2):
        differences = find_difference_context(sentence1, sentence2)
        
        if differences:
            for diff1, diff2 in differences:
                data['Sentence1'].append(sentence1)
                data['Sentence2'].append(sentence2)
                data['Difference Context 1'].append(diff1)
                data['Difference Context 2'].append(diff2)
        else:
            data['Sentence1'].append(sentence1)
            data['Sentence2'].append(sentence2)
            data['Difference Context 1'].append("No differences")
            data['Difference Context 2'].append("No differences")
    
    # Create a pandas DataFrame
    df = pd.DataFrame(data)
    
    return df



In [26]:
# Generate the report
report = generate_report(all_original, all_modified)

# Print the DataFrame or save it as a report
print(report.head())

report.to_csv('sentence_comparison_report.csv', index=False)

                                           Sentence1  \
0  Hurricane Ike devastates Texas, leaving reside...   
1  Hurricane Ike devastates Texas, leaving reside...   
2  Susana Trimarco's daughter, Marita Veron, vani...   
3  Susana Trimarco's daughter, Marita Veron, vani...   
4  Susana Trimarco's daughter, Marita Veron, vani...   

                                           Sentence2  \
0  Hurricane Ike devastates TX, leaving occupier ...   
1  Hurricane Ike devastates TX, leaving occupier ...   
2  Susana Trimarco 's daughter, Marita Veron, van...   
3  Susana Trimarco 's daughter, Marita Veron, van...   
4  Susana Trimarco 's daughter, Marita Veron, van...   

                                Difference Context 1  \
0            Ike devastates Texas, leaving residents   
1           Texas, leaving residents struggling with   
2                 Susana Trimarco's daughter, Marita   
3                    forced into a human trafficking   
4  soap opera "Vidas Robadas," highlighting th