This code was largely based on the OpenAI fine-tuning documentation and guide(https://platform.openai.com/docs/guides/fine-tuning).

Dataset used: https://github.com/hate-alert/Countering_Hate_Speech_ICWSM2019/blob/master/Data/Counterspeech_Dataset.json

The following code blocks convert the json data into data that can be used for fine-tuning the GPT-3.5 model

In [None]:
import json

# Load the dataset
with open('Counterspeech_Dataset.json', 'r') as file:
    dataset = json.load(file)

# Create a new transformed dataset
transformed_dataset = []

def community_mapper(community):
    """Function to convert community names."""
    mapping = {
        "jews": "Jewish",
        "black": "Black",
        "lgbt": "LGBT"
    }
    return mapping.get(community, community) # returns the community name if it's not in the dictionary

for entry in dataset:
    # Check if necessary keys are in the entry
    if "Community" in entry and "commentText" in entry and "CounterSpeech" in entry:
        community_name = community_mapper(entry["Community"])
        transformed_entry = {
            "Prompt": f'A YouTube Video, which contains hateful content that targets the {community_name} community, has the following comment: {entry["commentText"]}',
            "CounterSpeech": entry["CounterSpeech"] # enclosing the value in double quotes
        }
        transformed_dataset.append(transformed_entry)

# Save the transformed dataset to a new JSON file
with open('Transformed_Counterspeech_Dataset.json', 'w') as file:
    json.dump(transformed_dataset, file, indent=4)

In [None]:
with open("Transformed_Counterspeech_Dataset.json", "r") as file:
    data = json.load(file)

# Prepare the transformed data
transformed_data = []

system_message = "You are an assistant that is meant to determine whether the comment mentioned in the user message is an example of counterspeech. Counterspeech is defined as a response or comment that counters hateful or harmful speech/ideas. Respond with true or false."

for entry in data:
    user_message_content = entry["Prompt"]
    assistant_message_content = "true" if entry["CounterSpeech"] else "false"

    message_group = {
        "messages": [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_message_content},
            {"role": "assistant", "content": assistant_message_content}
        ]
    }

    transformed_data.append(message_group)

# Save the transformed data to a new JSONL file
with open("transformed_data.jsonl", "w") as file:
    for item in transformed_data:
        json.dump(item, file)
        file.write("\n")

In [None]:
import json
from sklearn.model_selection import train_test_split

# Load the data from jsonl file
with open('transformed_data.jsonl', 'r') as file:
    data = [json.loads(line) for line in file]

# Extract labels from data
labels = [entry['messages'][2]['content'] for entry in data]

# Split the data using stratified sampling to maintain the class balance
#80% training data, 10% test data, 10% validation data
train_data, temp_data, train_labels, temp_labels = train_test_split(data, labels, test_size=0.2, stratify=labels)
valid_data, test_data, valid_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, stratify=temp_labels)

# Save split data to separate jsonl files
with open('train_data.jsonl', 'w') as file:
    for item in train_data:
        file.write(json.dumps(item) + '\n')

with open('valid_data.jsonl', 'w') as file:
    for item in valid_data:
        file.write(json.dumps(item) + '\n')

with open('test_data.jsonl', 'w') as file:
    for item in test_data:
        file.write(json.dumps(item) + '\n')

Estimate training cost

In [None]:
# We start by importing the required packages

import json
import os
import tiktoken
import numpy as np
from collections import defaultdict

# Next, we specify the data path and open the JSONL file

data_path = "train_data.jsonl"

# Load dataset
with open(data_path) as f:
    dataset = [json.loads(line) for line in f]

# We can inspect the data quickly by checking the number of examples and the first item

# Initial dataset stats
print("Num examples:", len(dataset))
print("First example:")
for message in dataset[0]["messages"]:
    print(message)

# Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure

# Format error checks
format_errors = defaultdict(int)

for ex in dataset:
    if not isinstance(ex, dict):
        format_errors["data_type"] += 1
        continue

    messages = ex.get("messages", None)
    if not messages:
        format_errors["missing_messages_list"] += 1
        continue

    for message in messages:
        if "role" not in message or "content" not in message:
            format_errors["message_missing_key"] += 1

        if any(k not in ("role", "content", "name") for k in message):
            format_errors["message_unrecognized_key"] += 1

        if message.get("role", None) not in ("system", "user", "assistant"):
            format_errors["unrecognized_role"] += 1

        content = message.get("content", None)
        if not content or not isinstance(content, str):
            format_errors["missing_content"] += 1

    if not any(message.get("role", None) == "assistant" for message in messages):
        format_errors["example_missing_assistant_message"] += 1

if format_errors:
    print("Found errors:")
    for k, v in format_errors.items():
        print(f"{k}: {v}")
else:
    print("No errors found")

# Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.

# Token counting functions
encoding = tiktoken.get_encoding("cl100k_base")

# not exact!
# simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

# Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:

# Warnings and tokens counts
n_missing_system = 0
n_missing_user = 0
n_messages = []
convo_lens = []
assistant_message_lens = []

for ex in dataset:
    messages = ex["messages"]
    if not any(message["role"] == "system" for message in messages):
        n_missing_system += 1
    if not any(message["role"] == "user" for message in messages):
        n_missing_user += 1
    n_messages.append(len(messages))
    convo_lens.append(num_tokens_from_messages(messages))
    assistant_message_lens.append(num_assistant_tokens_from_messages(messages))

print("Num examples missing system message:", n_missing_system)
print("Num examples missing user message:", n_missing_user)
print_distribution(n_messages, "num_messages_per_example")
print_distribution(convo_lens, "num_total_tokens_per_example")
print_distribution(assistant_message_lens, "num_assistant_tokens_per_example")
n_too_long = sum(l > 4096 for l in convo_lens)
print(f"\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning")

# Pricing and default n_epochs estimate
MAX_TOKENS_PER_EXAMPLE = 4096

MIN_TARGET_EXAMPLES = 100
MAX_TARGET_EXAMPLES = 25000
TARGET_EPOCHS = 3
MIN_EPOCHS = 1
MAX_EPOCHS = 25

n_epochs = TARGET_EPOCHS
n_train_examples = len(dataset)
if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:
    n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)
elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:
    n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)

n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)
print(f"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training")
print(f"By default, you'll train for {n_epochs} epochs on this dataset")
print(f"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens")
print("See pricing page to estimate total costs")

In [None]:
!pip install openai

In [None]:
import os

os.environ["OPENAI_API_KEY"] = #<insert OpenAI API Key here>

In [None]:
# Create fine-tuning file
import openai
import os
openai.api_key = os.environ.get("OPENAI_API_KEY")

openai.File.create(
  file=open("train_data.jsonl", "rb"),
  purpose='fine-tune'
)

In [3]:
# Create fine-tuning job using the file id that is printed when you run the above code
# 5 epochs were the most effective when we tried testing the fine-tuned model against the validation data
# Run the code below
#openai.FineTuningJob.create(training_file=<insert file-id>, model="gpt-3.5-turbo", hyperparameters={"n_epochs": 5})

#Store the fine-tuning job id, starts with "ftjob-"

In [2]:
#Wait some time and run this line
#openai.FineTuningJob.retrieve(<insert fine-tuning job id>)
#Wait some time in-between each time you run it, eventually you should see that the "status" says succeeded
#There should be a fine-tuned model, which you should store

The following code provides the results and metrics

In [None]:
import openai
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the test data
with open("test_data.jsonl", "r") as f:
    valid_data = [json.loads(line) for line in f]

# Initialize the lists for true labels and predictions
true_labels = []
predicted_labels = []

# Get predictions for each example in validation data
for example in valid_data:
    completion = openai.ChatCompletion.create(
        model="ft:gpt-3.5-turbo-0613:personal::7tptzfbo", #replace this with your model
        messages=[
            {"role": "system", "content": "You are an assistant that is meant to determine whether the comment mentioned in the user message is an example of counterspeech. Counterspeech is defined as a response or comment that counters hateful or harmful speech/ideas. Respond with true or false."},
            {"role": "user", "content": example["messages"][1]["content"]}
        ]
    )

    predicted = completion.choices[0].message["content"].lower()
    true = example["messages"][2]["content"].lower()

    predicted_labels.append(predicted)
    true_labels.append(true)

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, pos_label="true")
recall = recall_score(true_labels, predicted_labels, pos_label="true")
f1 = f1_score(true_labels, predicted_labels, pos_label="true")

# Find which examples were incorrect
incorrect_examples = [example for i, example in enumerate(valid_data) if true_labels[i] != predicted_labels[i]]
correct_examples = [example for i, example in enumerate(valid_data) if true_labels[i] == predicted_labels[i]]

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print("\nIncorrect Examples:")
for example in incorrect_examples:
    print(example)
print("\nCorrect Examples:")
for example in correct_examples:
    print(example)
