In [1]:
from openai import OpenAI
import json
from tqdm import tqdm

client = OpenAI(api_key='your api key')

# Load prompts from a JSONL file
def load_prompts(file_path):
    with open(file_path, 'r') as file:
        return [json.loads(line) for line in file]

op_prompts = load_prompts('op_test_gpt.jsonl')
pairs_prompts = load_prompts('pairs_test_gpt.jsonl')

In [2]:
# Function to send requests and gather responses
def get_responses(prompts, instruction):
    responses = []
    for prompt in tqdm(prompts, desc="Getting GPT3.5 Responses: "):
        try:
            response = client.chat.completions.create(
                      model="gpt-3.5-turbo",
                      messages=[
                          {"role":"system", "content":instruction},
                          prompt['body']['messages'][1]
                      ],
                    max_completion_tokens=prompt['body']['max_tokens']
                    )
            responses.append(response)
        except Exception as e:
            print(f"Error processing prompt {prompt['custom_id']}: {e}")
            responses.append(None)
    return responses

In [90]:
# Get responses of Original Posts from GPT3.5
directly_predict = "You're a semantic analyst. Now I will show you a person's opinion statement. We know that the person publicly announced his/her argument and encouraged other people to challenge it. Judging from the speech style and lexical features, do you think he/she is resistant or malleable to persuasion? Answer with 'malleable' or 'resistant'."
predict_then_explain = "You're a semantic analyst. Now I will show you a person's opinion statement. We know that the person publicly announced his/her argument and encouraged other people to challenge it. Judging from the speech style and lexical features, do you think he/she is resistant or malleable to persuasion? Answer with 'malleable' or 'resistant' and explain your answer. Response with the following format: Prediction: resistant/malleable \n Explanation: briefly explain here."
explain_then_predict = "You're a semantic analyst. Now I will show you a person's opinion statement. We know that the person publicly announced his/her argument and encouraged other people to challenge it. Judging from the speech style and lexical features, do you think he/she is resistant or malleable to persuasion? First briefly explain your analysis and then give your answer with resistant/malleable. Response with the following format: Explanation: briefly explain here. \n  Prediction: resistant/malleable"

op_responses_direct = get_responses(op_prompts, directly_predict)
op_responses_pred_explain = get_responses(op_prompts, predict_then_explain)
op_responses_explain_pred = get_responses(op_prompts, explain_then_predict)

In [3]:
# Get responses of Pairs(op, reply1, reply2) from GPT3.5
directly_predict = "This is a conversation from an online discussion community. The first was a poster who posted an opinion, and the next two replies were each trying to convince the poster to revise his opinion. The two responses were similar, but one managed to convince the poster and the other didn't. Now judge which response succeeded in persuading. Answer reply 1 or reply 1 only."
predict_then_explain = "This is a conversation from an online discussion community. The first was a poster who posted an opinion, and the next two replies were each trying to convince the poster to revise his opinion. The two responses were similar, but one managed to convince the poster and the other didn't. Now first judge which response succeeded in persuading and then explain your analysis. Response with the following format: Prediction: reply 1/reply 2 \n Explanation: briefly explain here."
explain_then_predict = "This is a conversation from an online discussion community. The first was a poster who posted an opinion, and the next two replies were each trying to convince the poster to revise his opinion. The two responses were similar, but one managed to convince the poster and the other didn't. Now first show your analysis and then judge which response succeeded in persuading. Response with the following format: Explanation: briefly explain here. \n Prediction: reply 1/reply 2"

pairs_responses_direct = get_responses(pairs_prompts, directly_predict)
pairs_responses_pred_explain = get_responses(pairs_prompts, predict_then_explain)
pairs_responses_explain_pred = get_responses(pairs_prompts, explain_then_predict)

Getting GPT3.5 Responses: 100%|██████████| 200/200 [01:38<00:00,  2.03it/s]
Getting GPT3.5 Responses: 100%|██████████| 200/200 [06:20<00:00,  1.90s/it]
Getting GPT3.5 Responses: 100%|██████████| 200/200 [07:30<00:00,  2.25s/it]


In [4]:
def prediction_accuracy(responses, file_path, method="direct"):
    gpt_predictions = []
    if method == "direct":
        for res in responses:
            gpt_predictions.append(res.choices[0].message.content.lower())
    else:
        for res in responses:
            pred = res.choices[0].message.content.lower()
            start_point = pred.find("prediction: ") + len("prediction: ")
            # the strings 'malleable' and 'resistant' happen to have the same length of 9
            if method == "predict_then_explain":
                pred = pred[start_point: start_point + 9]
            elif method == "explain_then_predict":
                pred = pred[start_point: start_point + 9]
            gpt_predictions.append(pred)
    
    with open(file_path, 'r') as f:
        truths = [json.loads(line)["output"] for line in f]
    scores = [1 if gpt_predictions[j] == truths[j] else 0 for j in range(len(truths))]
    return sum(scores) / len(truths)

def z_test(n_sample, accuracy_1, accuracy_2):
    import numpy as np
    from scipy.stats import norm
    pooled = (accuracy_1 * n_sample + accuracy_2 * n_sample) / (n_sample * 2)
    z_score = (accuracy_1 - accuracy_2) / np.sqrt(pooled * (1 - pooled) * 2 / n_sample)
    p_value = norm.sf(abs(z_score)) * 2
    return p_value

In [None]:
file_path = "finetune_llama3_1/pairs_test_alpaca.jsonl"
n_sample = len(op_responses_direct)

op_accuracy_direct = prediction_accuracy(op_responses_direct, file_path)
op_accuracy_pred_explain = prediction_accuracy(op_responses_pred_explain,
                                               file_path,
                                               method="predict_then_explain")
op_accuracy_explain_pred = prediction_accuracy(op_responses_explain_pred,
                                               file_path,
                                               method="explain_then_predict")
p_2 = z_test(n_sample, op_accuracy_direct, op_accuracy_pred_explain)
p_3 = z_test(n_sample, op_accuracy_direct, op_accuracy_explain_pred)

print(f"Accuracy for direct prediction with GPT-3.5 turbo is {op_accuracy_direct:.3f}.")
print(f"Accuracy for predict-then-explain with GPT-3.5 turbo is {op_accuracy_pred_explain:.3f} with p-value: {p_2:.3f}")
print(f"Accuracy for explain-then-predict with GPT-3.5 turbo is {op_accuracy_explain_pred:.3f} with p-value: {p_3:.3f}")

In [None]:
pairs_accuracy_direct = prediction_accuracy(pairs_responses_direct, file_path)
pairs_accuracy_pred_explain = prediction_accuracy(pairs_responses_pred_explain,
                                               file_path,
                                               method="predict_then_explain")
pairs_accuracy_explain_pred = prediction_accuracy(pairs_responses_explain_pred,
                                               file_path,
                                               method="explain_then_predict")
p_2 = z_test(n_sample, pairs_accuracy_direct, op_accuracy_pred_explain)
p_3 = z_test(n_sample, pairs_accuracy_direct, op_accuracy_explain_pred)

print(f"Accuracy for direct prediction with GPT-3.5 turbo is {pairs_accuracy_direct:.3f}.")
print(f"Accuracy for predict-then-explain with GPT-3.5 turbo is {pairs_accuracy_pred_explain:.3f} with p-value: {p_2:.3f}")
print(f"Accuracy for explain-then-predict with GPT-3.5 turbo is {pairs_accuracy_explain_pred:.3f} with p-value: {p_3:.3f}")

In [116]:
def get_explanation_for_finetuning(prompts, truths):
    responses = []
    for prompt, truth in tqdm(zip(prompts, truths), 
                              desc="Getting GPT3.5 Responses: ", 
                              total=len(prompts)):
        if truth == "malleable":
            insert = "We know that he/she did get persuaded by some commentators. How might his/her speeching style and lexical features suggest he/she is malleable to persuasion?"
            
        elif truth == "resistant":
            insert = "We know that he/she never get persuaded by others. How might his/her speeching style and lexical features suggest he/she is resistant to persuasion?"
            
        instruction = f"You're a semantic analyst. Now I will show you a person's opinion statement, who publicly announced his/her argument and encouraged other people to challenge it. {insert} Very briefly explain your analysis with no more than 2 paragraphs."
        try:
            response = client.chat.completions.create(
                      model="gpt-3.5-turbo",
                      messages=[
                          {"role":"system", "content":instruction},
                          prompt['body']['messages'][1]
                      ],
                    max_completion_tokens=prompt['body']['max_tokens']
                    )
            responses.append(response)
        except Exception as e:
            print(f"Error processing prompt {prompt['custom_id']}: {e}")
            responses.append(None)
    return responses



explanation_prompts = load_prompts("finetune_llama3_1/op_train.jsonl")
truths = [line["output"] for line in explanation_prompts]
train_prompts = load_prompts("op_train_prompts.jsonl")
explanations = get_explanation_for_finetuning(train_prompts, truths)

Getting GPT3.5 Responses: 100%|██████████| 1000/1000 [43:23<00:00,  2.60s/it]


In [117]:
def create_jsonl(explanations, file_path):
    with open(file_path, "w") as f:
        for line in explanations:
            content = line.choices[0].message.content
            json.dump(content, f)
            f.write("\n")

file_path = 'finetune_llama3_1/op_gpt_explanations.jsonl'
create_jsonl(explanations, file_path)

In [9]:
len(pairs_responses_direct)

200