In [None]:
# ! pip install nltk
# import nltk
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('omw-1.4')
# nltk.download('punkt_tab')

In [1]:
import os
os.getcwd()

'c:\\Users\\G25971483\\Desktop\\Projects\\LLM\\self_recognition'

In [2]:
import random
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize

# Set the seed for reproducibility
random.seed(123)

all_words = []
all_syn = []

def get_synonyms(word):
    """
    Finds and returns synonyms for a given word using WordNet.
    This function takes a word as input, searches for its synonyms 
    using the WordNet synsets, and returns a list of synonyms.

    Args:
        word (str): The word for which to find synonyms.
    Returns:
        list: A list of synonyms for the input word. Returns an 
              empty list if no synonyms are found.
    """
    synonyms = []
    for syn in wordnet.synsets(word):
        for name in syn.lemma_names():
            # Exclude the original word to avoid replacing it with itself
            name = name.replace('_',' ')
            if name.lower() != word.lower():
                synonyms.append(name)
    return synonyms

def replace_with_synonyms(sentence, num_words_to_replace):
    """
    Replaces a specified number of words in a sentence with their synonyms.

    This function takes a sentence and an integer specifying the number of words 
    to replace with synonyms. It randomly samples 2x the required number of words 
    to ensure replacements are possible even if some words do not have synonyms.
    It uses the `get_synonyms` function to find synonyms for each sampled word,
    and replaces words in the sentence until the specified number is reached.

    Args:
        sentence (str): The input sentence from which words will be replaced.
        num_words_to_replace (int): The number of words in the sentence to be replaced by synonyms.

    Returns:
        str: The modified sentence with the specified number of words replaced by synonyms.
    """
    # Tokenize the sentence
    words = word_tokenize(sentence)
    # Filter out non-alphabetic tokens (like punctuation)
    words_alpha = [word for word in words if word.isalpha()]
    
    # Randomly sample words to replace - i use 2x words just to account for words without synonym
    words_to_replace = random.sample(words_alpha, min(2*num_words_to_replace, len(words_alpha)))
    
    # Create a new sentence with synonyms replaced
    words_replaced = 0
    new_sentence = []
    for word in words:
        if word in words_to_replace:
            synonyms = get_synonyms(word)
            if synonyms and words_replaced < num_words_to_replace:
                # Replace with a random synonym
                new_word = random.choice(synonyms)
                new_sentence.append(new_word)
                #operational
                all_words.append(word)
                all_syn.append(synonyms)
                words_replaced +=1
            else:
                # If no synonym is found, keep the original word
                new_sentence.append(word)
        else:
            new_sentence.append(word)
    
    return ' '.join(new_sentence)



### Examine example

In [3]:
from data import load_data, SOURCES, save_to_json, load_from_json
responses, articles, keys = load_data("cnn")

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
key = keys[22]
random_summary = responses["gpt4"][key]
print(random_summary)

Former government contractor indicted for stealing nuclear materials
Roy Lynn Oakley accused of attempting to sell restricted uranium enrichment components
Oakley faces up to 10 years in prison and a $250,000 fine per count
FBI sting operation prevented the materials from reaching foreign entities


In [13]:
new_summary = replace_with_synonyms(random_summary, 5)  # Replace 5of the words
print(new_summary)


Former government declarer indict for thievery atomic materials Roy Lynn Oakley accused of attempting to sell restricted atomic number 92 enrichment components Oakley faces up to 10 years in prison and a $ 250,000 fine per count FBI sting operation prevented the materials from reaching foreign entities


In [None]:
for i in range(len(all_words)):
    print('word:', all_words[i])
    print(all_syn[i])

# Apply to Dataset and Check Recognition

In [3]:
from data import load_data, SOURCES, save_to_json, load_from_json
from models import (
    get_gpt_recognition_logprobs,
    get_model_choice,
    get_logprobs_choice_with_sources,
    get_gpt_score,
)

from math import exp
from pprint import pprint
from random import shuffle

from tqdm import tqdm
import time


  from .autonotebook import tqdm as notebook_tqdm


In [22]:
# Only suitable for GPT models
def generate_gpt_detect_recognition_synonym(
    dataset,
    model,
    starting_idx=0,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):
    """
    Generates detection scores for GPT model outputs compared to other summaries.

    This function takes a dataset name, a base model for inference, a starting index 
    from which to begin enumeration of the dataset, and various options for detection 
    and synonym replacement. It makes API calls to GPT models using the OpenAI API key 
    to evaluate the similarity of each summary against all other summaries. If synonym 
    replacement is enabled, a specified number of words are replaced before comparison.
    
    The function performs inference using the base model, compares generated summaries 
    in forward and backward order, and returns a JSON object containing detection results.

    Args:
        dataset (str): The name of the dataset (e.g., "cnn") containing the articles.
        model (str): The base model on which inference will be performed.
        starting_idx (int, optional): The index to start processing articles from. Defaults to 0.
        detection_type (str, optional): The type of detection to perform. Defaults to "detection".
        replace_synonym (bool, optional): Whether to replace words in the summaries with synonyms. Defaults to False.
        num_words_to_replace (int, optional): The number of words to replace with synonyms if `replace_synonym` is True. Defaults to 0.

    Returns:
        dict: A JSON object containing information about:
            - Model compared against
            - Key of the article
            - Forward detection + probability
            - Backward detection + probability
            - Overall detection score

    """
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


In [25]:
for model in ["gpt4"]:
    print(SOURCES)
    print(f"Starting {model}")
    num_synonym = 5
    results = generate_gpt_detect_recognition_synonym(
        "cnn", model,replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=950
    )
    #Save results
    file_name = f"{model}_results_{num_synonym}_replace_50_sentence.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

['human', 'claude', 'gpt35', 'gpt4', 'llama']
Starting gpt4


Processing keys: 100%|██████████| 50/50 [13:35<00:00, 16.32s/it]


In [None]:
# FOR THE OTHER 450 data points
def generate_gpt_detect_recognition_synonym(
    dataset,
    model,
    starting_idx=0,
    ending_idx=1000,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):

    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:ending_idx], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


# Apply synonym to 'other' example as well

In [4]:
# Only suitable for GPT models
def generate_gpt_detect_recognition_dual_synonym(
    dataset,
    model,
    starting_idx=0,
    detection_type="detection",
    replace_synonym = False,
    num_words_to_replace = 0
):
    """
    Generates detection scores for GPT model outputs compared to other summaries using a dual synyonym replacement strategy.

    Args:
        dataset (str): The name of the dataset (e.g., "cnn") containing the articles.
        model (str): The base model on which inference will be performed.
        starting_idx (int, optional): The index to start processing articles from. Defaults to 0.
        detection_type (str, optional): The type of detection to perform. Defaults to "detection".
        replace_synonym (bool, optional): Whether to replace words in the summaries with synonyms. Defaults to False.
        num_words_to_replace (int, optional): The number of words to replace with synonyms if `replace_synonym` is True. Defaults to 0. Replaces the same number in both sentences being compared.



    """
    # For retrieving summaries, the specific fine-tuning version isn't needed
    exact_model = model
    model = "gpt35" if model.endswith("gpt35") else model

    responses, articles, keys = load_data(dataset)
    results = []  # load_from_json(f"results/{model}_results.json")

    for key in tqdm(keys[starting_idx:], desc="Processing keys"):
        article = articles[key]

        source_summary = responses[model][key]

        # replace synonym
        if replace_synonym:
            source_summary = replace_with_synonyms(source_summary, num_words_to_replace)

        for other in [s for s in SOURCES if s != model]:
            result = {"key": key, "model": other}
            other_summary = responses[other][key]

            # replace synonym
            if replace_synonym:
                other_summary = replace_with_synonyms(other_summary, num_words_to_replace)

            # Detection
            forward_result = get_model_choice(
                source_summary,
                other_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )
            backward_result = get_model_choice(
                other_summary,
                source_summary,
                article,
                detection_type,
                exact_model,
                return_logprobs=True,
            )

            forward_choice = forward_result[0].token
            backward_choice = backward_result[0].token

            result["forward_detection"] = forward_choice
            result["forward_detection_probability"] = exp(forward_result[0].logprob)
            result["backward_detection"] = backward_choice
            result["backward_detection_probability"] = exp(backward_result[0].logprob)

            match (forward_choice, backward_choice):
                case ("1", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[0].logprob)
                    )
                case ("2", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[1].logprob)
                    )
                case ("1", "1"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[0].logprob) + exp(backward_result[1].logprob)
                    )
                case ("2", "2"):
                    result["detection_score"] = 0.5 * (
                        exp(forward_result[1].logprob) + exp(backward_result[0].logprob)
                    )

            results.append(result)
    return results


In [7]:
for model in ["gpt4"]:
    print(SOURCES)
    print(f"Starting {model}")
    num_synonym = 5
    results = generate_gpt_detect_recognition_dual_synonym(
        "cnn", model,replace_synonym=True, num_words_to_replace=num_synonym, starting_idx=950
    )
    #Save results
    file_name = f"{model}_results_{num_synonym}_replace_bothsentences_50_sentence.json"
    path = os.path.join("results","cnn","synonym",file_name)
    save_to_json(results,path)

['human', 'claude', 'gpt35', 'gpt4', 'llama']
Starting gpt4


Processing keys: 100%|██████████| 50/50 [13:51<00:00, 16.63s/it]
