# Initializing the setup

In [None]:
!pip install openai==0.28
import pandas as pd
import openai

In [None]:
from IPython.display import clear_output
import time

from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import json

In [None]:
openai.api_key = ### OPENAI API KEY ###

# Open dataframe

In [None]:
load_path = 'data/data_final.csv'

original_df = pd.read_csv(load_path, index_col=[0])

In [None]:
# Fully integrated code with resampling for unbalanced data
def get_train_and_validation_sets(original_df, model_name, balanced, reversed=False):
    if balanced == True:
        b_suffix = '_balanced'
    elif balanced == False:
        b_suffix = '_unbalanced'

    if reversed == True:
        suffix = '_reversed'
    elif reversed == False:
        suffix = ''

    training_list = []
    for index, row in original_df.iterrows():
        entailment = {'old_index': index,
                      'claim_number': row['claim_number'],
                      'claim': row['claim'],
                      'generated_tweet': row[f'generated_entail_tweet{suffix}_{model_name}'],
                      'ground_truth': 'ENTAILMENT'}
        neutral = {'old_index': index,
                   'claim_number': row['claim_number'],
                   'claim': row['claim'],
                   'generated_tweet': row[f'generated_neutral_tweet{suffix}_{model_name}'],
                   'ground_truth': 'NEUTRAL'}
        contradiction = {'old_index': index,
                         'claim_number': row['claim_number'],
                         'claim': row['claim'],
                         'generated_tweet': row[f'generated_contradict_tweet{suffix}_{model_name}'],
                         'ground_truth': 'CONTRADICTION'}

        training_list.extend([contradiction, entailment, neutral])

    df = pd.DataFrame(training_list)

    if balanced == False:
        # Separate the classes
        df_entailment = df[df.ground_truth == 'ENTAILMENT']
        df_neutral = df[df.ground_truth == 'NEUTRAL']
        df_contradiction = df[df.ground_truth == 'CONTRADICTION']

        # Define new sample sizes
        n_total = len(df)
        n_entailment = int(0.5 * n_total)
        n_neutral = int(0.35 * n_total)
        n_contradiction = n_total - n_entailment - n_neutral

        # Resample the DataFrames
        df_entailment_resampled = resample(df_entailment, replace=True, n_samples=n_entailment, random_state=42)
        df_neutral_resampled = resample(df_neutral, replace=True, n_samples=n_neutral, random_state=42)
        df_contradiction_resampled = resample(df_contradiction, replace=False, n_samples=n_contradiction, random_state=42)

        # Concatenate the resampled DataFrames
        df = pd.concat([df_entailment_resampled, df_neutral_resampled, df_contradiction_resampled])

    # Shuffle the DataFrame
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Split the data into training and validation sets (80:20)
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

    # Function to create dataset from DataFrame
    def create_dataset(df, reversed):
        dataset = []
        for i, row in df.iterrows():
            tweet = row['generated_tweet']
            claim = row['claim']
            ground_truth = row['ground_truth']
            if reversed == False:
                datum = {
                    "messages": [
                        {"role": "system", "content": f"""Which of the following best describes the relationship between TWEET and CLAIM?

If TWEET is true:
(ENTAILMENT) then CLAIM is also true.
(NEUTRAL) CLAIM cannot be said to be true or false.
(CONTRADICTION) then CLAIM is false."""},
                        {"role": "user", "content": f"TWEET: {tweet}\nCLAIM: {claim}\nANSWER:"},
                        {"role": "assistant", "content": f"{ground_truth}"}
                    ]
                }
            elif reversed == True:
                datum = {
                    "messages": [
                        {"role": "system", "content": f"""Which of the following best describes the relationship between TWEET and CLAIM?

If TWEET is true:
(ENTAILMENT) then CLAIM is also true.
(NEUTRAL) CLAIM cannot be said to be true or false.
(CONTRADICTION) then CLAIM is false."""},
                        {"role": "user", "content": f"CLAIM: {claim}\nTWEET: {tweet}\nANSWER:"},
                        {"role": "assistant", "content": f"{ground_truth}"}
                    ]
                }
            dataset.append(datum)
        return dataset

    # Create train and validation datasets
    train_dataset = create_dataset(train_df, reversed)
    val_dataset = create_dataset(val_df, reversed)

    # Save the datasets as JSON files
    with open(f'data/openai_train_json/train_{model_name}{b_suffix}{suffix}.jsonl', 'w') as f:

        for d in train_dataset:
            # Convert dictionary to JSON-formatted string
            json_str = json.dumps(d)

            # Write to file
            f.write(json_str + "\n")

    with open(f'data/openai_train_json/val_{model_name}{b_suffix}{suffix}.jsonl', 'w') as f:

        for d in val_dataset:
            # Convert dictionary to JSON-formatted string
            json_str = json.dumps(d)

            # Write to file
            f.write(json_str + "\n")

    return train_df, val_df, train_dataset, val_dataset


In [None]:
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-4', balanced=True, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-4', balanced=True, reversed=True)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-4', balanced=False, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-4', balanced=False, reversed=True)

train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-3_5', balanced=True, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-3_5', balanced=True, reversed=True)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-3_5', balanced=False, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, 'gpt-3_5', balanced=False, reversed=True)

train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, '70b', balanced=True, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, '70b', balanced=True, reversed=True)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, '70b', balanced=False, reversed=False)
train_df, val_df, train_dataset, val_dataset = get_train_and_validation_sets(original_df, '70b', balanced=False, reversed=True)

# Make and upload json

In [None]:
import os

# Get list of all files and directories in '/content'
files_and_dirs = os.listdir('data/openai_train_json')

# If you only want files, not directories, you'll filter it like so:
file_list = [f for f in files_and_dirs if os.path.isfile(os.path.join('data/openai_train_json', f))]
file_list.sort()
file_list

In [None]:
for file_name in file_list:
    openai.File.create(
      file=open(file_name, "rb"),
      purpose='fine-tune'
    )

In [None]:
# put your uploaded file names
openai_file_dict = {
 'train_gpt-4_balanced.jsonl': "file-iNiTfeZr4A1lAar9D1uzotNl",
 'val_gpt-4_balanced.jsonl': "file-cMVJ8kld1MU6GAT7O8NmaZxN",
 'train_gpt-4_balanced_reversed.jsonl': "file-uE5gNiscctDcP4fIv6dBz6tP",
 'val_gpt-4_balanced_reversed.jsonl': "file-oQQ7bWhJq0mq5PGPhDqCsieu",
 'train_gpt-4_unbalanced.jsonl': "file-b3ZvVW9JI91Tmv4T176yU4Ww",
 'val_gpt-4_unbalanced.jsonl': "file-uyEjHXKGZ26gnXumJAlLfIvf",
 'train_gpt-4_unbalanced_reversed.jsonl': "file-Tvi3kzyboICJALdQeD0eXUKR",
 'val_gpt-4_unbalanced_reversed.jsonl': "file-xmfPLk0ACVB6DiLiboynOLlS",

 'train_gpt-3_5_balanced.jsonl': "file-E4k8BKSzRBiOEpWCFd8N8Mth",
 'val_gpt-3_5_balanced.jsonl': "file-kNN2mhw9WrNwn3F9VWDE6SNh",
 'train_gpt-3_5_balanced_reversed.jsonl': "file-FRTImyXjC1iUGg4g8DLHufHY",
 'val_gpt-3_5_balanced_reversed.jsonl': "file-2a0tRyecNNimoagAbJDn7p26",
 'train_gpt-3_5_unbalanced.jsonl': "file-1ZDla4LrJJtxjwpXnAirsCAq",
 'val_gpt-3_5_unbalanced.jsonl': "file-xmcEqNAGcI0tTtaRfzTbErzr",
 'train_gpt-3_5_unbalanced_reversed.jsonl': "file-2caz60Dy2ETA2p0zejA2UV7w",
 'val_gpt-3_5_unbalanced_reversed.jsonl': "file-JDsy3WletAxpWiU4JBqxTmIG",

 'train_70b_balanced.jsonl': "file-l7qFQBf8ePTRs0hkK7bed8Si",
 'val_70b_balanced.jsonl': "file-fTdCf2W4atKGt01nLcrtBtxv",
 'train_70b_balanced_reversed.jsonl': "file-l7qFQBf8ePTRs0hkK7bed8Si",
 'val_70b_balanced_reversed.jsonl': "file-eEd9R78BGuxCjhoNiJTiNw41",
 'train_70b_unbalanced.jsonl': "file-oPpnttKYUo7UhXhc2mpHdn4W",
 'val_70b_unbalanced.jsonl': "file-eEd9R78BGuxCjhoNiJTiNw41",
 'train_70b_unbalanced_reversed.jsonl': "file-lhqTqG3OyREo6OWuZY7H090W",
 'val_70b_unbalanced_reversed.jsonl': "file-haKXibiYdKwDiobOYwmPVRo8"
}

# fine tuning (gpt-3.5-turbo)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_gpt-3_5_balanced.jsonl'],
                                    validation_file= openai_file_dict['val_gpt-3_5_balanced.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_70b_balanced.jsonl'],
                                    validation_file= openai_file_dict['val_70b_balanced.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_gpt-3_5_balanced_reversed.jsonl'],
                                    validation_file= openai_file_dict['val_gpt-3_5_balanced_reversed.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_70b_balanced_reversed.jsonl'],
                                    validation_file= openai_file_dict['val_70b_balanced_reversed.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_gpt-3_5_unbalanced.jsonl'],
                                    validation_file= openai_file_dict['val_gpt-3_5_unbalanced.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_70b_unbalanced.jsonl'],
                                    validation_file= openai_file_dict['val_70b_unbalanced.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_gpt-3_5_unbalanced_reversed.jsonl'],
                                    validation_file= openai_file_dict['val_gpt-3_5_unbalanced_reversed.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
while True:
    try:
        openai.FineTuningJob.create(training_file= openai_file_dict['train_70b_unbalanced_reversed.jsonl'],
                                    validation_file= openai_file_dict['val_70b_unbalanced_reversed.jsonl'],
                                    model="gpt-3.5-turbo",
                                    hyperparameters={"n_epochs":3})
        break
    except Exception as e:
        print(e)
        time.sleep(300)

In [None]:
openai.FineTuningJob.list()['data'][0:12]

In [None]:
# put your trained model names
model_dict_gpt_4 = {
    # models
    }

model_dict_gpt_3_5 = {
    'balanced': "ft:gpt-3.5-turbo-0613:usc-annenberg-school::87bgUHHx",
    'balanced_reversed': "ft:gpt-3.5-turbo-0613:usc-annenberg-school::87bmrCtf",
    'unbalanced': "ft:gpt-3.5-turbo-0613:usc-annenberg-school::87eYpT1F",
    'unbalanced_reversed': "ft:gpt-3.5-turbo-0613:usc-annenberg-school::87iD3Nru"
}

model_dict_70b = {
    # models
}

# Test

In [None]:
test_df = pd.read_csv(load_path, index_col=[0])
save_path =

# Annotation loop (gpt-3.5-turbo example)

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in test_df.iterrows():
    tweet = row['tweet']
    claim = row['claim']

    try:
        retries = 3
        while retries > 0:
            try:

                if pd.isnull(row['prediction_finetuned_gpt_3_5']):
                    completion = openai.ChatCompletion.create(
                          model=model_dict_gpt_3_5['balanced'],
                          messages=[{'role': 'system', 'content': f"""Which of the following best describes the relationship between TWEET and CLAIM?

If TWEET is true:
(ENTAILMENT) then CLAIM is also true.
(NEUTRAL) CLAIM cannot be said to be true or false.
(CONTRADICTION) then CLAIM is false."""},

                        {"role": "user", "content": f"""TWEET: {tweet}
CLAIM: {claim}
ANSWER:"""}
                          ],
                          temperature=0
                        )
                    test_df.at[i, 'prediction_finetuned_gpt_3_5'] = completion.choices[0].message['content']

                break


            except openai.error.OpenAIError as e:
                print(f"Error: {e}")
                retries -= 1
                time.sleep(5)  # Wait for 5 seconds before

    except Exception as e:
        print(e)

    test_df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in test_df.iterrows():
    tweet = row['tweet']
    claim = row['claim']

    try:
        retries = 3
        while retries > 0:
            try:

                if pd.isnull(row['prediction_finetuned_gpt_3_5_unbalanced']):
                    completion = openai.ChatCompletion.create(
                          model=model_dict_gpt_3_5['unbalanced'],
                          messages=[{'role': 'system', 'content': f"""Which of the following best describes the relationship between TWEET and CLAIM?

If TWEET is true:
(ENTAILMENT) then CLAIM is also true.
(NEUTRAL) CLAIM cannot be said to be true or false.
(CONTRADICTION) then CLAIM is false."""},

                        {"role": "user", "content": f"""TWEET: {tweet}
CLAIM: {claim}
ANSWER:"""}
                          ],
                          temperature=0
                        )
                    test_df.at[i, 'prediction_finetuned_gpt_3_5_unbalanced'] = completion.choices[0].message['content']

                break


            except openai.error.OpenAIError as e:
                print(f"Error: {e}")
                retries -= 1
                time.sleep(5)  # Wait for 5 seconds before

    except Exception as e:
        print(e)

    test_df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in test_df.iterrows():
    tweet = row['tweet']
    claim = row['claim']

    try:
        retries = 3
        while retries > 0:
            try:

                if pd.isnull(row['prediction_finetuned_gpt_3_5_reversed']):
                    completion = openai.ChatCompletion.create(
                          model=model_dict_gpt_3_5['balanced_reversed'],
                          messages=[{"role": "system", "content": f"""Which of the following best describes the relationship between CLAIM and TWEET?

If CLAIM is true:
(ENTAILMENT) then TWEET is also true.
(NEUTRAL) TWEET cannot be said to be true or false.
(CONTRADICTION) then TWEET is false."""},
{"role": "user", "content": f"""CLAIM: {claim}
TWEET: {tweet}
ANSWER:"""}
                          ],
                          temperature=0
                        )
                    test_df.at[i, 'prediction_finetuned_gpt_3_5_reversed'] = completion.choices[0].message['content']

                break


            except openai.error.OpenAIError as e:
                print(f"Error: {e}")
                retries -= 1
                time.sleep(5)  # Wait for 5 seconds before

    except Exception as e:
        print(e)

    test_df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")

In [None]:
start_time = time.time()

# Iterate through the DataFrame
for i, row in test_df.iterrows():
    tweet = row['tweet']
    claim = row['claim']

    try:
        retries = 3
        while retries > 0:
            try:

                if pd.isnull(row['prediction_finetuned_gpt_3_5_unbalanced_reversed']):
                    completion = openai.ChatCompletion.create(
                          model=model_dict_gpt_3_5['unbalanced_reversed'],
                          messages=[{"role": "system", "content": f"""Which of the following best describes the relationship between CLAIM and TWEET?

If CLAIM is true:
(ENTAILMENT) then TWEET is also true.
(NEUTRAL) TWEET cannot be said to be true or false.
(CONTRADICTION) then TWEET is false."""},
{"role": "user", "content": f"""CLAIM: {claim}
TWEET: {tweet}
ANSWER:"""}
                          ],
                          temperature=0
                        )
                    test_df.at[i, 'prediction_finetuned_gpt_3_5_unbalanced_reversed'] = completion.choices[0].message['content']

                break


            except openai.error.OpenAIError as e:
                print(f"Error: {e}")
                retries -= 1
                time.sleep(5)  # Wait for 5 seconds before

    except Exception as e:
        print(e)

    test_df.to_csv(save_path)

    end_time = time.time()
    runtime = end_time - start_time

    clear_output(wait=True)

    print(f"Iteration: {i+1}, Runtime: {runtime} seconds")