# Initializing the setup

In [None]:
!pip install openai==0.28
import pandas as pd
import openai

In [None]:
from IPython.display import clear_output
import time

In [None]:
openai.api_key = ### OPENAI API KEY ###

# Define annotation function

In [None]:
import requests
import json

In [None]:
def generate_tweet(claim, task, model, temperature=1, max_tokens=256, reversed=False):
    if reversed==False:
        # Set the prompt based on the task
        if task in ['E', 'entailment', 'entail']:
            instruction = "Generate TWEET so that if CLAIM is true, then TWEET is also true. Be brief. Do not start a sentence with 'Just'."
            prompt = f"CLAIM: {claim}\nTWEET:"

        elif task in ['C', 'contradiction', 'contradict']:
            instruction = "Generate TWEET so that if CLAIM is true, TWEET is false. Be brief. Do not start a sentence with 'Just'."
            prompt = f"CLAIM: {claim}\nTWEET:"

        elif task in ['N', 'neutral']:
            instruction = "Generate TWEET so that even if CLAIM is true, TWEET cannot be said to be true or false. Be brief. Do not start a sentence with 'Just'. Use keywords from CLAIM."
            prompt = f"CLAIM: {claim}\nTWEET:"

        else:
            raise ValueError("Invalid task value.")

    else:
        # Set the prompt based on the task
        if task in ['E', 'entailment', 'entail']:
            instruction = "Generate TWEET so that if TWEET is true, then CLAIM is also true. Be brief. Do not start a sentence with 'Just'."
            prompt = f"CLAIM: {claim}\nTWEET:"

        elif task in ['C', 'contradiction', 'contradict']:
            instruction = "Generate TWEET so that if TWEET is true, CLAIM is false. Be brief. Do not start a sentence with 'Just'."
            prompt = f"CLAIM: {claim}\nTWEET:"

        elif task in ['N', 'neutral']:
            instruction = "Generate TWEET so that even if TWEET is true, CLAIM cannot be said to be true or false. Be brief. Do not start a sentence with 'Just'. Use keywords from CLAIM."
            prompt = f"CLAIM: {claim}\nTWEET:"

        else:
            raise ValueError("Invalid task value.")

    if model[0:3] == 'gpt':
        # Generate the response using OpenAI's API
        response = openai.ChatCompletion.create(
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            messages=[
                {"role": "system", "content": instruction},
                {"role": "user", "content": prompt},
            ]
        )

        return response.choices[0].message.content.strip()

    elif model[0:5] == 'Llama' or model[0:5] == 'llama':

        if model.find('70b') >= 0:
            llama = "meta-llama/Llama-2-70b-chat-hf"
        elif model.find('13b') >= 0:
            llama = "meta-llama/Llama-2-13b-chat-hf"
        elif model.find('7b') >= 0:
            llama = "meta-llama/Llama-2-7b-chat-hf"
        else:
            print('No model size found. Defaults to Llama-2-70b-chat-hf')

        TOKEN = ### HUGGINGFACE API KEY ###
        tokens=250
        input = f"""<s>[INST] <<SYS>> {instruction} <</SYS>> {prompt} [/INST]"""

        url = f'https://api-inference.huggingface.co/models/{llama}'
        headers = {
                "Content-type": "application/json",
                "Authorization": f'Bearer {TOKEN}',
            }
        body = {
                "inputs": input,

                "parameters": {"temperature": 1,
                              "max_new_tokens": tokens,
                              "return_full_text": False},
            }

        response = requests.post(url, headers=headers, data=json.dumps(body))
        return response.json()[0]['generated_text'].strip().split('\n')[0]

# Testing with prompt

In [None]:
model = 'llama-70b'
model.find('7b')

In [None]:
import time
start_time = time.time()

claim = "Vaccininated people emit Bluetooth signals."

entail = generate_tweet(claim, 'E', 'gpt-4')
print(entail)

entail_reversed = generate_tweet(claim, 'E', 'gpt-4', reversed=False)
print(entail_reversed)

end_time = time.time()
runtime = end_time - start_time
print(runtime)

# Open dataframe

In [None]:
load_path = 'data/data_final.csv'
save_path = 'data/data.csv'

df = pd.read_csv(load_path)

# Annotation loop (original order)

### gpt-4 generation (orginal order)

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in df.iterrows():

    claim = row['claim']
    retry = 0

    while True:

        try:
            if pd.isnull(row['generated_entail_tweet_gpt-4']):
                df.at[i, 'generated_entail_tweet_gpt-4'] = generate_tweet(claim, 'E', 'gpt-4')

            if pd.isnull(row['generated_contradict_tweet_gpt-4']):
                df.at[i, 'generated_contradict_tweet_gpt-4'] = generate_tweet(claim, 'C', 'gpt-4')

            if pd.isnull(row['generated_neutral_tweet_gpt-4']):
                df.at[i, 'generated_neutral_tweet_gpt-4'] = generate_tweet(claim, 'N', 'gpt-4')

            if pd.isnull(row['generated_entail_tweet_70b']):
                df.at[i, 'generated_entail_tweet_70b'] = generate_tweet(claim, 'E', 'llama_2_70b')

            if pd.isnull(row['generated_contradict_tweet_70b']):
                df.at[i, 'generated_contradict_tweet_70b'] = generate_tweet(claim, 'C', 'llama_2_70b')

            if pd.isnull(row['generated_neutral_tweet_70b']):
                df.at[i, 'generated_neutral_tweet_70b'] = generate_tweet(claim, 'N', 'llama_2_70b')

            if pd.isnull(row['generated_entail_tweet_gpt-3_5']):
                df.at[i, 'generated_entail_tweet_gpt-3_5'] = generate_tweet(claim, 'E', 'gpt-3.5-turbo')

            if pd.isnull(row['generated_contradict_tweet_gpt-3_5']):
                df.at[i, 'generated_contradict_tweet_gpt-3_5'] = generate_tweet(claim, 'C', 'gpt-3.5-turbo')

            if pd.isnull(row['generated_neutral_tweet_gpt-3_5']):
                df.at[i, 'generated_neutral_tweet_gpt-3_5'] = generate_tweet(claim, 'N', 'gpt-3.5-turbo')

            break

        except Exception as e:
            print(e)
            time.sleep(10)
            retry += 1
            if retry >= 50:
                break

    df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

### gpt-4 generation (reverse order)

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in df.iterrows():

    claim = row['claim']
    retry = 0

    while True:

        try:
            if pd.isnull(row['generated_entail_tweet_reversed_gpt-4']):
                df.at[i, 'generated_entail_tweet_reversed_gpt-4'] = generate_tweet(claim, 'E', 'gpt-4')

            if pd.isnull(row['generated_contradict_tweet_reversed_gpt-4']):
                df.at[i, 'generated_contradict_tweet_reversed_gpt-4'] = generate_tweet(claim, 'C', 'gpt-4')

            if pd.isnull(row['generated_neutral_tweet_reversed_gpt-4']):
                df.at[i, 'generated_neutral_tweet_reversed_gpt-4'] = generate_tweet(claim, 'N', 'gpt-4')

            if pd.isnull(row['generated_entail_tweet_reversed_70b']):
                df.at[i, 'generated_entail_tweet_reversed_70b'] = generate_tweet(claim, 'E', 'llama_2_70b')

            if pd.isnull(row['generated_contradict_tweet_reversed_70b']):
                df.at[i, 'generated_contradict_tweet_reversed_70b'] = generate_tweet(claim, 'C', 'llama_2_70b')

            if pd.isnull(row['generated_neutral_tweet_reversed_70b']):
                df.at[i, 'generated_neutral_tweet_reversed_70b'] = generate_tweet(claim, 'N', 'llama_2_70b')

            if pd.isnull(row['generated_entail_tweet_reversed_gpt-3_5']):
                df.at[i, 'generated_entail_tweet_reversed_gpt-3_5'] = generate_tweet(claim, 'E', 'gpt-3.5-turbo')

            if pd.isnull(row['generated_contradict_tweet_reversed_gpt-3_5']):
                df.at[i, 'generated_contradict_tweet_reversed_gpt-3_5'] = generate_tweet(claim, 'C', 'gpt-3.5-turbo')

            if pd.isnull(row['generated_neutral_tweet_reversed_gpt-3_5']):
                df.at[i, 'generated_neutral_tweet_reversed_gpt-3_5'] = generate_tweet(claim, 'N', 'gpt-3.5-turbo')

            break

        except Exception as e:
            print(e)
            time.sleep(10)
            retry += 1
            if retry >= 50:
                break

    df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

# Additional index

In [None]:
len(df)

In [None]:
import random
a = len(df)
x = list(range(a))
random.Random(42).shuffle(x)
entailment_add_index = x[:int(a * 1/2 - a * 1/3)]
neutral_add_index = x[int(a * 1/2 - a * 1/3):int((a * 1/2 - a * 1/3) + (a * 35/100 - a * 1/3))]

# Additional annotation for unbalanced train set

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in df.iterrows():
    claim = row['claim']
    retry = 0

    if i in entailment_add_index:

        while True:

            try:
                if pd.isnull(row['generated_entail_tweet_gpt-4_added']):
                    df.at[i, 'generated_entail_tweet_gpt-4_added'] = generate_tweet(claim, 'E', 'gpt-4')

                if pd.isnull(row['generated_entail_tweet_reversed_gpt-4_added']):
                    df.at[i, 'generated_entail_tweet_reversed_gpt-4_added'] = generate_tweet(claim, 'E', 'gpt-4', reversed=True)

                if pd.isnull(row['generated_entail_tweet_gpt-3_5_added']):
                    df.at[i, 'generated_entail_tweet_gpt-3_5_added'] = generate_tweet(claim, 'E', 'gpt-3.5-turbo')

                if pd.isnull(row['generated_entail_tweet_reversed_gpt-3_5_added']):
                    df.at[i, 'generated_entail_tweet_reversed_gpt-3_5_added'] = generate_tweet(claim, 'E', 'gpt-3.5-turbo', reversed=True)

                if pd.isnull(row['generated_entail_tweet_llama_2_70b_added']):
                    df.at[i, 'generated_entail_tweet_llama_2_70b_added'] = generate_tweet(claim, 'E', 'llama-70b')

                if pd.isnull(row['generated_entail_tweet_reversed_llama_2_70b_added']):
                    df.at[i, 'generated_entail_tweet_reversed_llama_2_70b_added'] = generate_tweet(claim, 'E', 'llama_2_70b', reversed=True)

                break

            except Exception as e:
                print(e)
                time.sleep(10)
                retry += 1
                if retry >= 50:
                    break

    elif i in neutral_add_index:

        while True:

            try:
                if pd.isnull(row['generated_neutral_tweet_gpt-4_added']):
                    df.at[i, 'generated_neutral_tweet_gpt-4_added'] = generate_tweet(claim, 'N', 'gpt-4')

                if pd.isnull(row['generated_neutral_tweet_reversed_gpt-4_added']):
                    df.at[i, 'generated_neutral_tweet_reversed_gpt-4_added'] = generate_tweet(claim, 'N', 'gpt-4', reversed=True)

                if pd.isnull(row['generated_neutral_tweet_gpt-3_5_added']):
                    df.at[i, 'generated_neutral_tweet_gpt-3_5_added'] = generate_tweet(claim, 'N', 'gpt-3.5-turbo')

                if pd.isnull(row['generated_neutral_tweet_reversed_gpt-3_5_added']):
                    df.at[i, 'generated_neutral_tweet_reversed_gpt-3_5_added'] = generate_tweet(claim, 'N', 'gpt-3.5-turbo', reversed=True)

                if pd.isnull(row['generated_neutral_tweet_llama_2_70b_added']):
                    df.at[i, 'generated_neutral_tweet_llama_2_70b_added'] = generate_tweet(claim, 'N', 'llama-70b')

                if pd.isnull(row['generated_neutral_tweet_reversed_llama_2_70b_added']):
                    df.at[i, 'generated_neutral_tweet_reversed_llama_2_70b_added'] = generate_tweet(claim, 'N', 'llama_2_70b', reversed=True)

                break

            except Exception as e:
                print(e)
                time.sleep(10)
                retry += 1
                if retry >= 50:
                    break

    df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")