In [None]:
# ! pip install nltk
# import nltk
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('omw-1.4')
# nltk.download('punkt_tab')
# ! pip install scikit-learn
# ! pip install sentence-transformers scikit-learn

In [24]:
from data import load_data, SOURCES, save_to_json, load_from_json
from models import (
    get_gpt_recognition_logprobs,
    get_model_choice,
    get_logprobs_choice_with_sources,
    get_gpt_score,
    get_gpt_summary_similarity,
    get_gpt_paraphrase,
    get_claude_paraphrase
)

from math import exp
from pprint import pprint
from random import shuffle

from tqdm import tqdm
import time
import os

import random
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize

import re
import json

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np



# Synonym Wordnet Generator

In [1]:
# Set the seed for reproducibility
random.seed(123)

all_words = []
all_syn = []

def get_synonyms(word):
    """
    Finds and returns synonyms for a given word using WordNet.
    This function takes a word as input, searches for its synonyms 
    using the WordNet synsets, and returns a list of synonyms.

    Args:
        word (str): The word for which to find synonyms.
    Returns:
        list: A list of synonyms for the input word. Returns an 
              empty list if no synonyms are found.
    """
    synonyms = []
    for syn in wordnet.synsets(word):
        for name in syn.lemma_names():
            # Exclude the original word to avoid replacing it with itself
            name = name.replace('_',' ')
            if name.lower() != word.lower():
                synonyms.append(name)
    return synonyms

def replace_with_synonyms(sentence, num_words_to_replace):
    """
    Replaces a specified number of words in a sentence with their synonyms.

    This function takes a sentence and an integer specifying the number of words 
    to replace with synonyms. It randomly samples 2x the required number of words 
    to ensure replacements are possible even if some words do not have synonyms.
    It uses the `get_synonyms` function to find synonyms for each sampled word,
    and replaces words in the sentence until the specified number is reached.

    Args:
        sentence (str): The input sentence from which words will be replaced.
        num_words_to_replace (int): The number of words in the sentence to be replaced by synonyms.

    Returns:
        str: The modified sentence with the specified number of words replaced by synonyms.
    """
    # Tokenize the sentence
    words = word_tokenize(sentence)
    # Filter out non-alphabetic tokens (like punctuation)
    words_alpha = [word for word in words if word.isalpha()]
    
    # Randomly sample words to replace - i use 2x words just to account for words without synonym
    words_to_replace = random.sample(words_alpha, min(2*num_words_to_replace, len(words_alpha)))
    
    # Create a new sentence with synonyms replaced
    words_replaced = 0
    new_sentence = []
    for word in words:
        if word in words_to_replace:
            synonyms = get_synonyms(word)
            if synonyms and words_replaced < num_words_to_replace:
                # Replace with a random synonym
                new_word = random.choice(synonyms)
                new_sentence.append(new_word)
                #operational
                all_words.append(word)
                all_syn.append(synonyms)
                words_replaced +=1
            else:
                # If no synonym is found, keep the original word
                new_sentence.append(word)
        else:
            new_sentence.append(word)
    
    return ' '.join(new_sentence)



### Examine example

In [3]:
from data import load_data, SOURCES, save_to_json, load_from_json
responses, articles, keys = load_data("cnn")

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
key = keys[22]
random_summary = responses["gpt4"][key]
print(random_summary)

Former government contractor indicted for stealing nuclear materials
Roy Lynn Oakley accused of attempting to sell restricted uranium enrichment components
Oakley faces up to 10 years in prison and a $250,000 fine per count
FBI sting operation prevented the materials from reaching foreign entities


In [13]:
new_summary = replace_with_synonyms(random_summary, 5)  # Replace 5of the words
print(new_summary)


Former government declarer indict for thievery atomic materials Roy Lynn Oakley accused of attempting to sell restricted atomic number 92 enrichment components Oakley faces up to 10 years in prison and a $ 250,000 fine per count FBI sting operation prevented the materials from reaching foreign entities


In [None]:
for i in range(len(all_words)):
    print('word:', all_words[i])
    print(all_syn[i])

# Apply to Dataset and Check Recognition

In [5]:
# Only suitable for GPT models
def generate_gpt_detect_recognition_synonym(
    dataset,
    model,
    starting_idx=0,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):
    """
    Generates detection scores for GPT model outputs compared to other summaries.

    This function takes a dataset name, a base model for inference, a starting index 
    from which to begin enumeration of the dataset, and various options for detection 
    and synonym replacement. It makes API calls to GPT models using the OpenAI API key 
    to evaluate the similarity of each summary against all other summaries. If synonym 
    replacement is enabled, a specified number of words are replaced before comparison.
    
    The function performs inference using the base model, compares generated summaries 
    in forward and backward order, and returns a JSON object containing detection results.

    Args:
        dataset (str): The name of the dataset (e.g., "cnn") containing the articles.
        model (str): The base model on which inference will be performed.
        starting_idx (int, optional): The index to start processing articles from. Defaults to 0.
        detection_type (str, optional): The type of detection to perform. Defaults to "detection".
        replace_synonym (bool, optional): Whether to replace words in the summaries with synonyms. Defaults to False.
        num_words_to_replace (int, optional): The number of words to replace with synonyms if `replace_synonym` is True. Defaults to 0.

    Returns:
        dict: A JSON object containing information about:
            - Model compared against
            - Key of the article
            - Forward detection + probability
            - Backward detection + probability
            - Overall detection score

    """
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


In [None]:
for model in ["gpt4"]:
    print(SOURCES)
    print(f"Starting {model}")
    num_synonym = 5
    results = generate_gpt_detect_recognition_synonym(
        "cnn", model,replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=950
    )
    #Save results
    file_name = f"{model}_results_{num_synonym}_replace_50_sentence.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

In [8]:
# FOR THE OTHER 450 data points
def generate_gpt_detect_recognition_synonym(
    dataset,
    model,
    starting_idx=0,
    ending_idx=1000,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):

    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:ending_idx], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


In [9]:
for model in ["gpt4"]:
    print(SOURCES)
    print(f"Starting {model}")
    num_synonym = 2
    results = generate_gpt_detect_recognition_synonym(
        "cnn", model,replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=500, ending_idx=950
    )
    #Save results
    file_name = f"{model}_results_{num_synonym}_replace_450_sentence.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

['human', 'claude', 'gpt35', 'gpt4', 'llama']
Starting gpt4


Processing keys: 100%|██████████| 450/450 [1:59:31<00:00, 15.94s/it]  


# Apply synonym to 'other' example as well

In [4]:
# Only suitable for GPT models
def generate_gpt_detect_recognition_dual_synonym(
    dataset,
    model,
    starting_idx=0,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):
    """
    Generates detection scores for GPT model outputs compared to other summaries using a dual synyonym replacement strategy.

    Args:
        dataset (str): The name of the dataset (e.g., "cnn") containing the articles.
        model (str): The base model on which inference will be performed.
        starting_idx (int, optional): The index to start processing articles from. Defaults to 0.
        detection_type (str, optional): The type of detection to perform. Defaults to "detection".
        replace_synonym (bool, optional): Whether to replace words in the summaries with synonyms. Defaults to False.
        num_words_to_replace (int, optional): The number of words to replace with synonyms if `replace_synonym` is True. Defaults to 0. Replaces the same number in both sentences being compared.



    """
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # replace synonym
            if replace_synonym:
                other_summary = replace_with_synonyms(other_summary, num_words_to_replace)

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


In [7]:
for model in ["gpt4"]:
    print(SOURCES)
    print(f"Starting {model}")
    num_synonym = 5
    results = generate_gpt_detect_recognition_dual_synonym(
        "cnn", model,replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=950
    )
    #Save results
    file_name = f"{model}_results_{num_synonym}_replace_bothsentences_50_sentence.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

['human', 'claude', 'gpt35', 'gpt4', 'llama']
Starting gpt4


Processing keys: 100%|██████████| 50/50 [13:51<00:00, 16.63s/it]


# Finding semantically similar indexes in the summaries

In [2]:
# Only suitable for GPT models
def get_similar_sentence_index(
    dataset,
    model,
    starting_idx=0,
):
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        summaries = []
        result = {"key": key}
        for other in [s for s in SOURCES]:
            summaries.append(responses[other][key])
            
        result_json = get_gpt_summary_similarity(summaries[0],summaries[1],summaries[2],summaries[3],summaries[4], index=True)
        result["indexes"]= result_json 
        results.append(result)
    return results


In [None]:
for model in ["gpt4"]:
    print(SOURCES)
    num_synonym = 2
    results = get_similar_sentence_index("cnn", model, starting_idx=998)
    print(results)


In [5]:
results

[{'key': '9177e5ac94f038749e8d4eb526a65461e0f6df4c',
  'indexes': '["Summary1:0", "Summary2:0", "Summary3:0", "Summary4:0", "Summary5:1"]'},
 {'key': 'f12e4bbb07211de7d43b4e331dc73404aa804562',
  'indexes': '["Summary1:2", "Summary2:1", "Summary3:2", "Summary4:1", "Summary5:0"]'}]

In [4]:
results = [{'key': '9177e5ac94f038749e8d4eb526a65461e0f6df4c',
  'indexes': '["Summary1:0", "Summary2:0", "Summary3:0", "Summary4:0", "Summary5:1"]'},
 {'key': 'f12e4bbb07211de7d43b4e331dc73404aa804562',
  'indexes': '["Summary1:2", "Summary2:1", "Summary3:2", "Summary4:1", "Summary5:0"]'}]

## Visualize what is similar

In [6]:
import json

In [9]:
# Function to extract a record by key
def get_record_by_key(results, search_key):
    for record in results:
        if record['key'] == search_key:
            return record
    return None  # Return None if key is not found

In [10]:
# Function to extract indexes from the record
def extract_indexes(record):
    if record and 'indexes' in record:
        indexes_str = record['indexes']
        # Convert the indexes string to a list using json.loads
        indexes_list = json.loads(indexes_str)
        return indexes_list
    return []

In [13]:
# Only suitable for GPT models
def visualize_similar(
    dataset,
    model,
    starting_idx=0,
):
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        summaries = []
        result = get_record_by_key(results,key)
        count = 1
        for idx, other in enumerate([s for s in SOURCES]):
            print(other)
            summary = responses[other][key]
            summary = summary.split('\n')
            indexes = extract_indexes(result)
            item = indexes[idx]
            summary_value = int(item.split(":")[1])
            summary = summary[summary_value]
            print(summary)
            
    return results


In [29]:
for model in ["gpt4"]:
    print(SOURCES)
    visualize_similar("cnn", model, starting_idx=998)

['human', 'claude', 'gpt35', 'gpt4', 'llama']


Processing keys: 100%|██████████| 2/2 [00:00<?, ?it/s]

human
Judge on Heather Mills: Level of premarital wealth "exaggerated"
claude
Judge rejects Mills' claim that she was wealthy before meeting McCartney in 1999
gpt35
Judge rejects Heather Mills' claim of wealth before marriage to Paul McCartney
gpt4
Judge finds Heather Mills' claims of wealth in 1999 exaggerated and rejects her portrayal as Paul McCartney's business partner
llama
Judge finds Mills' wealth exaggerated and her living style unrealistic
human
President Taylor's daughter married future president of an enemy power
claude
Elizabeth Harrison Walker, daughter of President Benjamin Harrison, was an economic expert who appeared on radio and TV shows
gpt35
Elizabeth Harrison Walker: Accomplished woman, lawyer, economist, and media personality
gpt4
Elizabeth Harrison Walker, daughter of President Benjamin Harrison, became a lawyer and economic expert, appearing on radio and TV
llama
Sarah Knox Taylor Davis - Died at 21 after falling ill with malaria while visiting her husband's rela




### Split of sentences

In [14]:
# What are the splits for each sentence

['human', 'claude', 'gpt35', 'gpt4', 'llama']


Processing keys:   0%|          | 0/2 [00:00<?, ?it/s]

Processing keys: 100%|██████████| 2/2 [00:00<?, ?it/s]

human
Judge on Heather Mills: Level of premarital wealth "exaggerated"
McCartney gave Mills several hundred thousand dollars each year
Judge: Mills' case "boils down to ... 'if he has it, I want it too' "
claude
Judge rejects Mills' claim that she was wealthy before meeting McCartney in 1999
Judge finds McCartney's account more credible regarding Mills' role in his life
Judge rules the 30 paintings in their home were lent by McCartney, not gifts to Mills
Judge believes Mills has future earning capacity despite her claim it is now zero
gpt35
Judge rejects Heather Mills' claim of wealth before marriage to Paul McCartney
Mills' portrayal as McCartney's business partner is deemed "make-belief" by judge
McCartney's total wealth estimated at approximately £400 million ($800 million)
gpt4
Judge finds Heather Mills' claims of wealth in 1999 exaggerated and rejects her portrayal as Paul McCartney's business partner
McCartney was generous, giving Mills substantial capital and gifts, but did not 




# Synonym - return full sentence via GPT

In [2]:
# Only suitable for GPT models
def get_similar_sentence(
    dataset,
    model,
    starting_idx=0,
):
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        summaries = []
        result = {"key": key}
        for other in [s for s in SOURCES]:
            summaries.append(responses[other][key])
            
        result_json = get_gpt_summary_similarity(summaries[0],summaries[1],summaries[2],summaries[3],summaries[4])
        result["sentences"]= result_json 
        results.append(result)
    return results



In [3]:
for model in ["gpt4"]:
    print(SOURCES)
    results = get_similar_sentence("cnn", model, starting_idx=997)
    print(results)


['human', 'claude', 'gpt35', 'gpt4', 'llama']


Processing keys: 100%|██████████| 3/3 [00:07<00:00,  2.52s/it]

[{'key': '5f02aa32bd1dc95e47355755398e31550b232f8a', 'sentences': '["Eight Florida teens to be tried as adults in videotaped beating case", "Eight Florida teenagers face kidnapping and battery charges for videotaped group beating of a 16-year-old girl.", "Eight Florida teens face life in prison for assaulting another teen", "Eight Florida teens charged as adults for videotaped beating of another teen, facing life in prison", "Eight Florida teens aged 14-18 will be tried as adults for beating another teenager in a viral video"]'}, {'key': '9177e5ac94f038749e8d4eb526a65461e0f6df4c', 'sentences': '["Judge on Heather Mills: Level of premarital wealth", "Judge rejects Mills\' claim that she was wealthy", "Judge rejects Heather Mills\' claim of wealth before", "Judge finds Heather Mills\' claims of wealth in", "Judge finds Mills\' wealth exaggerated and her living"]'}, {'key': 'f12e4bbb07211de7d43b4e331dc73404aa804562', 'sentences': '["President Taylor\'s daughter married future", "Sarah Kno




In [4]:
results[0]['sentences']

'["Eight Florida teens to be tried as adults in videotaped beating case", "Eight Florida teenagers face kidnapping and battery charges for videotaped group beating of a 16-year-old girl.", "Eight Florida teens face life in prison for assaulting another teen", "Eight Florida teens charged as adults for videotaped beating of another teen, facing life in prison", "Eight Florida teens aged 14-18 will be tried as adults for beating another teenager in a viral video"]'

In [4]:
results[1]['sentences']

NameError: name 'results' is not defined

In [10]:
# Example string
input_string = results[2]['sentences'] 

# # Use regular expressions to find all text between double quotes
# extracted_list = re.findall(r'"(.*?)"', input_string)

extracted_list = json.loads(input_string)

# Print the result
print(extracted_list)


["President Taylor's daughter married future", 'Sarah Knox Taylor Davis married future', 'Sarah Knox Taylor Davis: Tragic life,', 'Sarah Knox Taylor Davis, daughter of', 'Sarah Knox Taylor Davis - Died']


In [15]:
extracted_list[0]

'Judge on Heather Mills: Level of premarital wealth "exaggerated"'

In [11]:
len(extracted_list)

5

## What is similar?


In [13]:
# Function to extract a record by key
def get_record_by_key(results, search_key):
    for record in results:
        if record['key'] == search_key:
            return record
    return None  # Return None if key is not found

def find_most_similar_sentence(sentences, reference_sentence):
    # Combine all sentences
    all_sentences = sentences + [reference_sentence]
    
    # Initialize the TF-IDF Vectorizer
    vectorizer = TfidfVectorizer().fit_transform(all_sentences)
    
    # Compute cosine similarity between the reference and all other sentences
    similarity_matrix = cosine_similarity(vectorizer[-1], vectorizer[:-1])
    
    # Get the index of the most similar sentence
    most_similar_index = similarity_matrix.argsort()[0][-1]
    
    return sentences[most_similar_index]



In [16]:
# Only suitable for GPT models
def visualize_similar(
    dataset,
    model,
    results,
    starting_idx=0
):
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        summaries = []
        result = get_record_by_key(results,key)
        input_string = result['sentences']
        extracted_list = json.loads(input_string)

        for idx, other in enumerate([s for s in SOURCES]):
            print(other)
            summary = responses[other][key]
            summary = summary.split('\n')
           
            reference_sentence = extracted_list[idx]
            print('ref', reference_sentence)
            similar_in_key = find_most_similar_sentence(summary, reference_sentence)
            print(similar_in_key)
            
    return results


In [18]:
for model in ["gpt4"]:
    print(SOURCES)
    visualize_similar("cnn", model, results, starting_idx=997)

['human', 'claude', 'gpt35', 'gpt4', 'llama']


Processing keys: 100%|██████████| 3/3 [00:00<00:00, 221.25it/s]

human
ref Eight Florida teens to be tried as adults in videotaped beating case
Eight Florida teens to be tried as adults in videotaped beating case
claude
ref Eight Florida teenagers face kidnapping and battery charges for videotaped group beating of a 16-year-old girl.
Eight Florida teenagers face kidnapping and battery charges for videotaped group beating of a 16-year-old girl. They could be sentenced to life in prison if convicted
gpt35
ref Eight Florida teens face life in prison for assaulting another teen
Eight Florida teens face life in prison for assaulting another teen
gpt4
ref Eight Florida teens charged as adults for videotaped beating of another teen, facing life in prison
Eight Florida teens charged as adults for videotaped beating of another teen, facing life in prison
llama
ref Eight Florida teens aged 14-18 will be tried as adults for beating another teenager in a viral video
Eight Florida teens aged 14-18 will be tried as adults for beating another teenager in a viral v




# Paraphrase - Using Sentence Transformer and Replacing using GPT4

In [9]:
# Load a pre-trained Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to find semantically similar sentences across multiple sources
def find_similar_sentence_from_each_source(sources):
    # Create an empty list to hold all candidate sentences and keep track of their source
    all_sentences = []
    sentence_source_mapping = []

    for idx, source in enumerate(sources):
        all_sentences.extend(source)
        sentence_source_mapping.extend([idx] * len(source))
    
    # Generate embeddings for all sentences
    embeddings = model.encode(all_sentences, convert_to_tensor=True)

    # Calculate pairwise cosine similarity
    similarity_matrix = cosine_similarity(embeddings.cpu().numpy())

    # Find the maximum similarity while ensuring at least one sentence from each source
    best_sentences = []
    used_sources = set()
    
    # Iterate over all sentences and try to find one from each source
    for idx, sentence in enumerate(all_sentences):
        current_source = sentence_source_mapping[idx]
        if current_source not in used_sources:
            # Find the most similar sentence from this source to any other sentence from other sources
            best_match_score = -1
            best_match_sentence = None

            for jdx in range(len(all_sentences)):
                if idx != jdx and sentence_source_mapping[jdx] != current_source:
                    similarity_score = similarity_matrix[idx][jdx]
                    if similarity_score > best_match_score:
                        best_match_score = similarity_score
                        best_match_sentence = sentence
            
            if best_match_sentence:
                best_sentences.append(best_match_sentence)
                used_sources.add(current_source)

            if len(used_sources) == len(sources):
                break

    return best_sentences




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
def get_similar_sentence_transformer(
    dataset,
    starting_idx=0,
):

    responses, articles, keys = load_data(dataset)
    result = {}

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        summaries = []
        result[key]= {}
        for idx, other in enumerate([s for s in SOURCES]):
            summary = responses[other][key]
            summary = summary.split('\n')
            summaries.append(summary)

        similar_sentences = find_similar_sentence_from_each_source(summaries)
        for idx, other in enumerate([s for s in SOURCES]):
            result[key][other] = similar_sentences[idx]

            
    return result


In [11]:
similar_sentences_cnn = get_similar_sentence_transformer("cnn", starting_idx=500)

Processing keys:   0%|          | 0/500 [00:00<?, ?it/s]

Processing keys: 100%|██████████| 500/500 [00:18<00:00, 26.69it/s]


In [12]:
similar_sentences_cnn['ffb817ce85d7c19720ebbf0b43b01d0da61e9c06']

{'human': 'The two-hour interview takes place in the Netherlands',
 'claude': "Aruban authorities questioned Joran van der Sloot about Natalee Holloway's disappearance",
 'gpt35': "Joran van der Sloot questioned in Netherlands about Natalee Holloway's disappearance",
 'gpt4': "Joran van der Sloot was questioned in the Netherlands about Natalee Holloway's disappearance",
 'llama': "Aruban authorities questioned Joran van der Sloot in the Netherlands about Natalee Holloway's disappearance"}

In [13]:
def replace_gpt_paraphraser(
    dataset,
    starting_idx=0,
):

    responses, articles, keys = load_data(dataset)

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        for idx, other in enumerate([s for s in SOURCES]):
            summary = responses[other][key]
            sentence_to_paraphrase = similar_sentences_cnn[key][other]
            alternate = get_gpt_paraphrase(sentence_to_paraphrase)
            summary = summary.replace(sentence_to_paraphrase, alternate)
            responses[other][key] = summary

    return responses


In [14]:
def replace_claude_paraphraser(
    dataset,
    starting_idx=0,
):

    responses, articles, keys = load_data(dataset)

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        for idx, other in enumerate([s for s in SOURCES]):
            summary = responses[other][key]
            sentence_to_paraphrase = similar_sentences_cnn[key][other]
            alternate = get_claude_paraphrase(sentence_to_paraphrase)
            summary = summary.replace(sentence_to_paraphrase, alternate)
            responses[other][key] = summary

    return responses

In [15]:
modified_responses = replace_gpt_paraphraser("cnn", 500)
file_name = "cnn_gpt4_paraphrased_responses_1.json"
path = os.path.join("summaries","cnn",file_name)
save_to_json(modified_responses,path)

Processing keys: 100%|██████████| 500/500 [52:34<00:00,  6.31s/it]   


In [16]:
modified_responses['human']['9177e5ac94f038749e8d4eb526a65461e0f6df4c']

'Court on Heather Mills: Extent of wealth before marriage "overstated"\nMcCartney gave Mills several hundred thousand dollars each year\nJudge: Mills\' case "boils down to ... \'if he has it, I want it too\' "'

In [17]:
responses['human']['9177e5ac94f038749e8d4eb526a65461e0f6df4c']

NameError: name 'responses' is not defined

In [18]:
responses_modified_by_claude = replace_claude_paraphraser("cnn", 500)
file_name = "cnn_claude3-5_paraphrased_responses.json"
path = os.path.join("summaries","cnn",file_name)
save_to_json(responses_modified_by_claude,path)

Processing keys: 100%|██████████| 500/500 [1:04:28<00:00,  7.74s/it]   


In [19]:
responses_modified_by_claude['human']['9177e5ac94f038749e8d4eb526a65461e0f6df4c']

'The court determined that Heather Mills had overstated her financial status before marriage.\nMcCartney gave Mills several hundred thousand dollars each year\nJudge: Mills\' case "boils down to ... \'if he has it, I want it too\' "'

In [21]:
responses['human']['9177e5ac94f038749e8d4eb526a65461e0f6df4c']

NameError: name 'responses' is not defined

## Get Self Recognition Results on modified

In [12]:
def load_modified_results():
    file_name = "cnn_claude3-5_paraphrased_responses.json"
    path = os.path.join("summaries","cnn",file_name)
    loaded = load_from_json(path)
    return loaded

In [13]:
def generate_gpt_detect_recognition_paraphrase(
    dataset,
    model,
    starting_idx=0,
    ending_idx=1000,
    detection_type="detection",
    paraphrase_source=False,
    paraphrase_other=False
):
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    modified_responses = load_modified_results()
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:ending_idx], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if paraphrase_source:
            source_summary = modified_responses[model][key]

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]
            if paraphrase_other:
                other_summary = modified_responses[other][key]
            # Detection
            try:
                forward_result = get_model_choice(
                    source_summary,
                    other_summary,
                    article,
                    detection_type,
                    exact_model,
                    return_logprobs=True,
                )
                backward_result = get_model_choice(
                    other_summary,
                    source_summary,
                    article,
                    detection_type,
                    exact_model,
                    return_logprobs=True,
                )

                forward_choice = forward_result[0].token
                backward_choice = backward_result[0].token

                result["forward_detection"] = forward_choice
                result["forward_detection_probability"] = exp(forward_result[0].logprob)
                result["backward_detection"] = backward_choice
                result["backward_detection_probability"] = exp(backward_result[0].logprob)

                match (forward_choice, backward_choice):
                    case ("1", "2"):
                        result["detection_score"] = 0.5 * (
                            exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                        )
                    case ("2", "1"):
                        result["detection_score"] = 0.5 * (
                            exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                        )
                    case ("1", "1"):
                        result["detection_score"] = 0.5 * (
                            exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                        )
                    case ("2", "2"):
                        result["detection_score"] = 0.5 * (
                            exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                        )
            except ValueError:
                print('Error:', key, other)
            results.append(result)
    return results



In [14]:
for model in ["gpt4"]:
    #print(SOURCES)
    print(f"Starting {model}")
    results = generate_gpt_detect_recognition_paraphrase(
        "cnn", model, starting_idx=500, ending_idx=950, paraphrase_source=True, paraphrase_other=True
    )
    #Save results
    file_name = f"{model}_results_claude_paraphrased_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

Starting gpt4


Processing keys: 100%|██████████| 450/450 [43:14<00:00,  5.77s/it]   


In [16]:
for model in ["gpt4"]:
    print(f"Starting {model}")
    results = generate_gpt_detect_recognition_paraphrase(
        "cnn", model, starting_idx=500, ending_idx=950, paraphrase_source=True, paraphrase_other=False
    )
    #Save results
    file_name = f"{model}_results_claude_paraphrased_source_only_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

Starting gpt4


Processing keys: 100%|██████████| 450/450 [35:44<00:00,  4.77s/it]


In [18]:
for model in ["gpt4"]:
    print(f"Starting {model}")
    results = generate_gpt_detect_recognition_paraphrase(
        "cnn", model,  starting_idx=500, ending_idx=950, paraphrase_source=False, paraphrase_other=True
    )
    #Save results
    file_name = f"{model}_results_claude_paraphrased_other_only_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

Starting gpt4


Processing keys: 100%|██████████| 450/450 [39:52<00:00,  5.32s/it]  


## Get Self Preference Results

In [29]:
def load_modified_results():
    file_name = "cnn_gpt4_paraphrased_responses.json"
    path = os.path.join("summaries","cnn",file_name)
    loaded = load_from_json(path)
    return loaded

# Only suitable for GPT models
def generate_gpt_preference_paraphrase(
    dataset,
    model,
    starting_idx=0,
    ending_idx=1000,
    detection_type="detection",
    comparison_type="comparison",
    paraphrase_source=False,
    paraphrase_other=False
    
):
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    modified_responses = load_modified_results()
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        # replace synonym
        if paraphrase_source:
            source_summary = modified_responses[model][key]

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            # Comparison
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                comparison_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                comparison_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            # If the comparison asked "Which is worse?" then reverse the options
            if comparison_type == "comparison_with_worse":
                forward_choice = "1" if forward_choice == "2" else "2"
                backward_choice = "1" if backward_choice == "2" else "2"

            result["forward_comparison"] = forward_choice
            result["forward_comparison_probability"] = exp(forward_result[0].logprob)
            result["backward_comparison"] = backward_choice
            result["backward_comparison_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["self_preference"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["self_preference"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["self_preference"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["self_preference"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results

In [33]:
for model in ["gpt4"]:
    #print(SOURCES)
    print(f"Starting {model}")
    results = generate_gpt_preference_paraphrase(
        "cnn", model, starting_idx=500, ending_idx=950, paraphrase_source=True, paraphrase_other=True
    )
    #Save results
    file_name = f"{model}_preference_results_gpt_paraphrased_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

Starting gpt4


Processing keys:   2%|▏         | 8/500 [01:31<1:34:11, 11.49s/it]


KeyboardInterrupt: 

In [None]:
for model in ["gpt4"]:
    print(f"Starting {model}")
    results = generate_gpt_detect_recognition_paraphrase(
        "cnn", model, starting_idx=500, ending_idx=950, paraphrase_source=True, paraphrase_other=False
    )
    #Save results
    file_name = f"{model}_preference_results_gpt_paraphrased_source_only_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

In [None]:
for model in ["gpt4"]:
    print(f"Starting {model}")
    results = generate_gpt_detect_recognition_paraphrase(
        "cnn", model,  starting_idx=500, ending_idx=950, paraphrase_source=False, paraphrase_other=True
    )
    #Save results
    file_name = f"{model}_preference_results_gpt_paraphrased_other_only_450.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

## Find Percentage of Sentence Modified

In [19]:
def load_modified_results():
    file_name = "cnn_gpt4_paraphrased_responses_1.json"
    path = os.path.join("summaries","cnn",file_name)
    loaded = load_from_json(path)
    return loaded

###
def generate_percentage_modifed(
    dataset,
    starting_idx=0,
    ending_idx=1000
):
    responses, articles, keys = load_data(dataset)
    modified_responses = load_modified_results()
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:ending_idx], desc="Processing keys"):
        for idx, other in enumerate([s for s in SOURCES]):
            source_summary = responses[other][key]
            sentence_to_paraphrase = similar_sentences_cnn[key][other]
            
            main_length = len(source_summary)
            substring_length = len(sentence_to_paraphrase)
            # Calculate percentage
            percentage = (substring_length / main_length) * 100
            result = {"key": key, "model": other, "paraphrase":percentage}
            results.append(result)

    return results



In [20]:
modified_percentages = generate_percentage_modifed("cnn", starting_idx=500)

Processing keys:   0%|          | 0/500 [00:00<?, ?it/s]




NameError: name 'similar_sentences_cnn' is not defined

In [None]:
file_name = "cnn_gpt4_modified_percentage_1.json"
path = os.path.join("summaries","cnn",file_name)
save_to_json(modified_percentages,path)

In [5]:
###
def generate_length_modifed(
    dataset,
    starting_idx=0,
    ending_idx=1000
):
    responses, articles, keys = load_data(dataset)
    modified_responses = load_modified_results()
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:ending_idx], desc="Processing keys"):
        for idx, other in enumerate([s for s in SOURCES]):
            sentence_to_paraphrase = similar_sentences_cnn[key][other]
            substring_length = len(sentence_to_paraphrase)
            result = {"key": key, "model": other, "paraphrase_length":substring_length}
            results.append(result)

    return results

In [14]:
length_modified = generate_length_modifed("cnn", starting_idx=500)

Processing keys: 100%|██████████| 500/500 [00:00<00:00, 500274.81it/s]


In [None]:
file_name = "cnn_gpt4_modified_length_1.json"
path = os.path.join("summaries","cnn",file_name)
save_to_json(length_modified,path)