In [1]:
import os
from openai import OpenAI
import pandas as pd
import json

In [2]:
key = os.environ.get("OPENAI_API_KEY")
# if not running locally paste key manually
# DO NOT PUBLISH KEY TO GITHUB
# hard coding creds is poor practice.
# key = ""
client=OpenAI(api_key=key)

In [4]:
import json

def sanitize_to_utf8(input_str):
    """
    Sanitizes a string field to UTF-8.
    """
    return input_str.encode('utf-8', 'replace').decode('utf-8')

def process_jsonl_file(input_jsonl_path, output_jsonl_path):
    """
    Processes a JSONL file line by line, sanitizes each line to UTF-8, 
    and saves it to a new JSONL file.
    """
    with open(input_jsonl_path, 'r', encoding='utf-8') as input_file, \
         open(output_jsonl_path, 'w', encoding='utf-8') as output_file:
        
        for line in input_file:
            # Parse JSON line
            json_obj = json.loads(line)

            # Sanitize data
            sanitized_json_obj = {key: sanitize_to_utf8(value) if isinstance(value, str) else value
                                  for key, value in json_obj.items()}

            # Write sanitized JSON object to new JSONL file
            output_file.write(json.dumps(sanitized_json_obj) + '\n')



In [None]:
# Paths to your files
input_jsonl_path = 'train_data_full.json'
output_jsonl_path = 'output.jsonl'

# Process the JSONL file
process_jsonl_file(input_jsonl_path, output_jsonl_path)

In [5]:
sys_prompt = "You are an expert capable of discerning truthful from deceptive opinions based on speech patterns."

In [6]:
def gen_finetune(input, output, test=False):
    with open(input, 'r', encoding='utf-8') as data_in, \
        open(output, 'w') as gpt_out:
        for i, line in enumerate(data_in):
            user_prompt = json.loads(line)['text']
            sys_reply = "True" if json.loads(line)["label"] == 1 else "False"
            if not test:
                payload = {"messages": [{"role": "system", "content": sys_prompt}, {"role": "user", "content": user_prompt}, {"role": "assistant", "content": sys_reply}]}
            else:# exclude response from test set
                payload = {"messages": [{"role": "system", "content": sys_prompt}, {"role": "user", "content": user_prompt}]}
            gpt_out.write(json.dumps(payload) + '\n')


In [33]:
gen_finetune('train_data_300.json', 'train300_gpt.jsonl')
gen_finetune('val_data_300.json', 'val300_gpt.jsonl')

In [20]:
gen_finetune('train_data_full.json', 'train_full_gpt.jsonl')
gen_finetune('val_data_full.json', 'val_full_gpt.jsonl')
#gen_finetune('test_data_full.json', 'test_full_gpt.jsonl')

Example of querying model for classification.  
Example on model trained on 300 example set.

In [12]:
test_lie = "One morning three months ago I was in a hurry and tripped on the steps while running to take a shower before work. I ended up fracturing 4 metatarsal bones that required two surgeries to fix. I was really having a hard time not being able to walk whenever I wanted to. I really had such a bad attitude at the beginning because I was so used to being independent. Now that I have recovered I have a new appreciation for my ability to walk. I feel like the whole time I couldn't walk I was thinking about how much I took that ability for granted. But now I choose to walk more than I ever had before. When I walk the dogs I go further out of my way just to enjoy the ability to do it. I am completely recovered and I am going to take this as a lesson learned. Nothing is more important that my personal health. I need to make sure that even if I am running late, I need to take my time and be careful. Instead f making it to work on time, I ended up missing weeks of work. Now all I do at work is try and catch up with everything I missed. It was really nice that people at work came and visited me at the hospital. I really appreciated all the flowers and candy and food that was delivered. I think that this showed me how loved I truly am."
test_truth = "Each and every abortion is essentially a tragedy. The potential mother will suffer unforeseen consequences. Society as a whole will be deprived of the potential it could have received from the new life."
response = client.chat.completions.create(
  model="ft:gpt-3.5-turbo-0613:personal::8S542QSs",
  messages=[
    {"role": "system", "content": sys_prompt},
    {"role": "user", "content": test_lie},
    {"role": "user", "content": test_truth},
  ]
)
response.choices[0].message.content

'False'

In [7]:
mdls = {
    '300':"ft:gpt-3.5-turbo-0613:personal::8S542QSs",
    '3.5':"gpt-3.5-turbo"
    }

In [8]:
def predict(text, model, sys_prompt):
    response = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": sys_prompt},
        {"role": "user", "content": text}
    ]
    )
    return response.choices[0].message.content

In [9]:
def eval(test_file, mdl, return_df = False, sys_prompt=sys_prompt):
    msgs = []
    preds = []
    trues = []
    replies = []
    with open(test_file, 'r') as f:
        for ex in f:
            ex = json.loads(ex)
            msg = ex['text']
            #pred = predict(msg, mdl) == 'True' #'True' if predict(msg) else 'False'
            reply = predict(msg, mdl, sys_prompt) #for debug
            pred = reply == 'True'
            true = ex['label'] == 1
            msgs.append(msg)
            preds.append(pred)
            trues.append(true)
            replies.append(reply) # debug
    test_df = pd.DataFrame.from_dict({'msg': msgs, 'preds': preds, 'true': trues})   
    acc = (test_df['preds'] == test_df['true']).mean()
    if return_df:
        return (test_df, acc)
    else:
        return acc

Baseline gpt3.5 loves to give verbose or indecisive answers. Verbose answers, that actually classify a sentiment as genuine or deceptive can be classified fairly easily. Howver, there is still the issue of baseline gpt3.5 deciding it does not have enough information to classify the statement
- "There is no objective truth to the statement as it expresses subjective opinions and beliefs about abortion. It cannot be definitively classified as 'True' or 'False'."

To address this issue we need to engineer a system prompt that discourages this behaivor and adequately explains the task. This prompt will be provided to the model at every example query, so instructions should also be consise, to minimize any token overhead that will lead to increased cost of training and queries.

- "You are an expert capable of discerning truthful from deceptive opinions based on speech patterns. Definitively classify the following statement as 'True' or 'False', based on the likelihood  the statement represents a genuinely held beleif or a deception."

This issue is avoided in the finetuned models as the traning process rewards our expected behaivor and output format.

In [20]:
sys_prompt_base = sys_prompt + ' Definitively classify the following statement as \'True\' or \'False\', based on the likelihood  the statement represents a genuinely held beleif or a deception.'
#base_df, base_acc = predict('test_data_300.json', mdls['3.5'])
predict(test_truth, mdls['300'], sys_prompt=sys_prompt_base)

'False'

In [25]:
msgs = []
preds = []
trues = []
with open('test_data_300.json', 'r') as f:
    for ex in f:
        ex = json.loads(ex)
        msg = ex['text']
        pred = predict(msg, mdl['300']) == 'True' #'True' if predict(msg) else 'False'
        true = ex['label'] == 1
        msgs.append(msg)
        preds.append(pred)
        trues.append(true)
test_df = pd.DataFrame.from_dict({'msg': msgs, 'preds': preds, 'true': trues})   
test_df

Unnamed: 0,msg,preds,true
0,I do not think that euthanasia should be legal...,False,False
1,"I pick up my instrument, hold it in my arms, f...",True,True
2,Tomorrow me and my ex partner will be taking o...,True,True
3,I wanted to write about one of the best days i...,True,False
4,We are going to cannock chase with the mountai...,True,True
5,My Step-daughter is having a surprise party fo...,True,True
6,Cannibals should be legalized. It is healthier...,False,True
7,I don't really care. If people want to get mar...,True,True
8,I will get the train to LINCOLN where my niece...,False,True
9,I don't think people should be able to end the...,False,False


In [27]:
(test_df['preds'] == test_df['true']).mean()

0.8222222222222222