In [None]:
!pip install openai

Reformatting test_data.jsonl from fine-tuning code for two-shot prompting

In [None]:
import json

# Step 1: Read the JSONL Data
with open('test_data.jsonl', 'r') as file:
    lines = file.readlines()
    data = [json.loads(line) for line in lines]

# Step 2: Transform the Data
grouped_data = []

for i in range(0, len(data) - 2, 3):
    content_str = (
        'Example 1: ' + data[i]["messages"][1]["content"] + '\n' +
        'Answer 1: ' + data[i]["messages"][2]["content"] + '\n' +
        'Example 2: ' + data[i+1]["messages"][1]["content"] + '\n' +
        'Answer 2: ' + data[i+1]["messages"][2]["content"] + '\n' +
        data[i+2]["messages"][1]["content"]
    )
    grouped_example = {
        "messages": [
            {
                "role": "system",
                "content": (
                    "You are an assistant that is meant to determine whether the "
                    "last comment mentioned in the user message is an example of counterspeech. "
                    "You have been given two comments before that as examples."
                    " Counterspeech is defined as a "
                    "response or comment that counters hateful or harmful speech/ideas. Respond with true or false."
                )
            },
            {"role": "user", "content": content_str},
            {"role": "assistant", "content": data[i+2]["messages"][2]["content"]}
        ]
    }
    grouped_data.append(grouped_example)

# Save transformed data (optional)
with open('two-shot test data.jsonl', 'w') as file:
    json.dump(grouped_data, file, indent=4)


In [None]:
import json

# Input and output file names
input_filename = 'two-shot test data.jsonl'
output_filename = 'reformatted_two-shot test data.jsonl'

# Load data from the input file
with open(input_filename, 'r') as infile:
    data = json.load(infile)

# Write data to the output file in .jsonl format
with open(output_filename, 'w') as outfile:
    for item in data:
        outfile.write(json.dumps(item) + '\n')

print(f"Data reformatted and saved to {output_filename}")

Metrics & Results

In [None]:
import openai
import os
import json
import time
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score, precision_score

# Set API Key
os.environ["OPENAI_API_KEY"] = #<insert OpenAI API Key>
openai.api_key = os.environ.get("OPENAI_API_KEY")

# Read the .jsonl file
data = []
with open('reformatted_two-shot test data.jsonl', 'r') as file:
    for line in file:
        data.append(json.loads(line))

# Lists to store actual and predicted values
actual = []
predicted = []
# Go through each example and predict
for example in data:
    # Extract messages without the assistant's message
    messages = example["messages"][:-1]

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0
        )
        predicted_response = response["choices"][0]["message"]["content"].lower()

        # Check the response and treat any non-true responses as 'false'
        if predicted_response not in ['true', 'false']:
            if 'true' in predicted_response:
              predicted_response = 'true'
            elif 'false' in predicted_response:
              predicted_response = 'false'
        print(predicted_response)

        predicted.append(predicted_response)
        actual_response = example["messages"][-1]["content"].lower()
        actual.append(actual_response)
    except Exception as e:
        print("Error for example:", messages)
        print(e)

 # Wait for 1 second before the next request

# Metrics
print("Accuracy:", accuracy_score(actual, predicted))
print("F1-score:", f1_score(actual, predicted, pos_label="true"))
print("Precision:", precision_score(actual, predicted, pos_label="true"))
print("Recall:", recall_score(actual, predicted, pos_label="true"))
print(classification_report(actual, predicted, target_names=["false", "true"]))

# Displaying examples
correct = []
incorrect = []

for a, p, ex in zip(actual, predicted, data):
    if a == p:
        correct.append(ex)
    else:
        incorrect.append(ex)

print("\nExamples GPT-3.5 got right:")
for c in correct:
    print(c)

print("\nExamples GPT-3.5 got wrong:")
for ic in incorrect:
    print(ic)
