# Initializing the setup

In [None]:
import pandas as pd
import openai

In [None]:
from IPython.display import clear_output
import time

In [None]:
with open("OPENAI_API_KEY.txt", "r") as file:
    openai.api_key = file.read()

with open("HUGGINGFACE_API_KEY.txt", "r") as file:
    TOKEN = file.read()

# Define annotation function

In [None]:
import requests
import json

In [None]:
def generate_tweet(claim, task, model, temperature=1, max_tokens=2048):

    # Set the prompt based on the task
    if task in ['E', 'entailment', 'entail']:
        instruction = "Generate TWEET so that if TWEET is true, then CLAIM is also true. Be brief. Do not start a sentence with 'Just'."
        prompt = f"CLAIM: {claim}\nTWEET:"

    elif task in ['C', 'contradiction', 'contradict']:
        instruction = "Generate TWEET so that if TWEET is true, CLAIM is false. Be brief. Do not start a sentence with 'Just'."
        prompt = f"CLAIM: {claim}\nTWEET:"

    elif task in ['N', 'neutral']:
        instruction = "Generate TWEET so that even if TWEET is true, CLAIM cannot be said to be true or false. Be brief. Do not start a sentence with 'Just'. Use keywords from CLAIM."
        prompt = f"CLAIM: {claim}\nTWEET:"

    else:
        raise ValueError("Invalid task value.")

    if model[0:3] == 'gpt':
        # Generate the response using OpenAI's API
        response = openai.ChatCompletion.create(
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            messages=[
                {"role": "system", "content": instruction},
                {"role": "user", "content": prompt},
            ]
        )

        return response.choices[0].message.content.strip()

    elif model[0:5] == 'Llama' or model[0:5] == 'llama':

        if model.find('70b') >= 0:
            llama = "meta-llama/Llama-2-70b-chat-hf"
        elif model.find('13b') >= 0:
            llama = "meta-llama/Llama-2-13b-chat-hf"
        elif model.find('7b') >= 0:
            llama = "meta-llama/Llama-2-7b-chat-hf"
        else:
            print('No model size found. Defaults to Llama-2-70b-chat-hf')

        tokens=2048
        input = f"""<s>[INST] <<SYS>> {instruction} <</SYS>> {prompt} [/INST]"""

        url = f'https://api-inference.huggingface.co/models/{llama}'
        headers = {
                "Content-type": "application/json",
                "Authorization": f'Bearer {TOKEN}',
            }
        body = {
                "inputs": input,

                "parameters": {"temperature": 1,
                              "max_new_tokens": tokens,
                              "return_full_text": False},
            }

        response = requests.post(url, headers=headers, data=json.dumps(body))
        return response.json()[0]['generated_text'].strip().split('\n')[0]

# Testing with prompt

In [None]:
import time
start_time = time.time()

claim = "Vaccininated people emit Bluetooth signals."

entail = generate_tweet(claim, 'E', 'gpt-4')
print(entail)

contradict = generate_tweet(claim, 'C', 'gpt-4')
print(contradict)

neutral = generate_tweet(claim, 'N', 'gpt-4')
print(neutral)

end_time = time.time()
runtime = end_time - start_time
print(runtime)

In [None]:
import time
start_time = time.time()

claim = "Vaccininated people emit Bluetooth signals."

entail = generate_tweet(claim, 'E', 'llama-2-70b')
print(entail)

contradict = generate_tweet(claim, 'C', 'llama-2-70b')
print(contradict)

neutral = generate_tweet(claim, 'N', 'llama-2-70b')
print(neutral)

end_time = time.time()
runtime = end_time - start_time
print(runtime)

# Open dataframe

In [None]:
df = pd.read_csv('FACT-GPT dataset.csv', index_col=0)
df.columns

# Annotation loop

### gpt-4 generation

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in df.iterrows():

    claim = row['claim']
    retry = 0

    while True:

        try:
            if pd.isnull(row['generated_entail_tweet_gpt-4']):
                df.at[i, 'generated_entail_tweet_gpt-4'] = generate_tweet(claim, 'E', 'gpt-4')

            if pd.isnull(row['generated_contradict_tweet_gpt-4']):
                df.at[i, 'generated_contradict_tweet_gpt-4'] = generate_tweet(claim, 'C', 'gpt-4')

            if pd.isnull(row['generated_neutral_tweet_gpt-4']):
                df.at[i, 'generated_neutral_tweet_gpt-4'] = generate_tweet(claim, 'N', 'gpt-4')

            break

        except Exception as e:
            print(e)
            time.sleep(10)
            retry += 1
            if retry >= 50:
                break

    df.to_csv('FACT-GPT dataset.csv')

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

# Unassign runtime

In [None]:
from google.colab import runtime
runtime.unassign()