In [60]:
!pip install -U openai




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [61]:
pip install datasets

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [62]:
import pandas as pd
import transformers
import torch
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [63]:
from openai import OpenAI
client = OpenAI(api_key="your_open_ai_key")

In [64]:
dataset = load_dataset("empathetic_dialogues")
train = pd.DataFrame(dataset['train'])
validation = pd.DataFrame(dataset['validation'])
test = pd.DataFrame(dataset['test'])

In [65]:
# if you want to train faster with less data

train = train[train['context'].isin(['sentimental','surprised'])].sample(100).reset_index(drop = True)
validation = validation[validation['context'].isin(['sentimental','surprised'])].sample(20).reset_index(drop = True)
test = test[test['context'].isin(['sentimental','surprised'])].sample(20).reset_index(drop = True)

In [66]:
train.shape,validation.shape,test.shape

((100, 8), (20, 8), (20, 8))

In [67]:
train['context'].unique(),validation['context'].unique(), test['context'].unique()

(array(['surprised', 'sentimental'], dtype=object),
 array(['surprised', 'sentimental'], dtype=object),
 array(['sentimental', 'surprised'], dtype=object))

In [68]:
train["text"] = train['prompt']+" [SEP] "+ train['utterance']
validation["text"] = validation['prompt']+" [SEP] "+ validation['utterance']
test["text"] = test['prompt']+" [SEP] "+ test['utterance']

In [69]:
def convert_to_gpt35_format(dataset):
    fine_tuning_data = []
    for _, row in dataset.iterrows():
        json_response = '{"context": "' + str(row['context']) + '"}'
        fine_tuning_data.append({
            "messages": [
                {"role": "user", "content": row['text']},
                {"role": "system", "content": json_response}
            ]
        })
    return fine_tuning_data


In [70]:
gpt_train = convert_to_gpt35_format(train)
gpt_val = convert_to_gpt35_format(validation)
gpt_test = convert_to_gpt35_format(test)

In [71]:
gpt_test

[{'messages': [{'role': 'user',
    'content': "I was going through boxes the other day. I came across old pictures I didn't know I still had. It really made me think back. [SEP] Yes_comma_ definitely! It really made us all start talking about those and other memories_comma_ good times."},
   {'role': 'system', 'content': '{"context": "sentimental"}'}]},
 {'messages': [{'role': 'user',
    'content': 'I saw a dog get hit by a car today_comma_ it was a very emotional day. [SEP] Oh no!!! Did you see what happened to the dog?!'},
   {'role': 'system', 'content': '{"context": "sentimental"}'}]},
 {'messages': [{'role': 'user',
    'content': "I recently found out that I am having baby number 4.  [SEP] That's nice! Kids are great"},
   {'role': 'system', 'content': '{"context": "surprised"}'}]},
 {'messages': [{'role': 'user',
    'content': 'I recently won a $100 on a scratch off! I never win so I was really taken back. [SEP] Not usually_comma_ I never win. so_comma_ it was very unexpected

In [72]:
def write_to_jsonl(data, file_path):
    with open(file_path, 'w') as file:
        for entry in data:
            json.dump(entry, file)
            file.write('\n')

training_file_name = "train_em.jsonl"
validation_file_name = "val_em.jsonl"

write_to_jsonl(gpt_train, training_file_name)
write_to_jsonl(gpt_val, validation_file_name)

In [None]:
# Upload Training and Validation Files
training_file = client.files.create(
    file=open(training_file_name, "rb"), purpose="fine-tune"
)
validation_file = client.files.create(
    file=open(validation_file_name, "rb"), purpose="fine-tune"
)

# Create Fine-Tuning Job
suffix_name = "gpt_empathi"
response = client.fine_tuning.jobs.create(
    training_file=training_file.id,
    validation_file=validation_file.id,
    model="gpt-3.5-turbo",
    suffix=suffix_name,
)
response 

In [None]:

client.fine_tuning.jobs.list(limit=10)# search for suffix_name

In [None]:
response = client.fine_tuning.jobs.retrieve("ftjob-h5P9gmmDXysjbOD99jz8TtYv") # retrieve based on yours job
response 
     

In [None]:
fine_tuned_model_id = response.fine_tuned_model
print("\nFine-tuned model id:", fine_tuned_model_id)

In [None]:
def format_test(row):
    formatted_message = [{"role": "user", "content": row['Support Query']}]
    return formatted_message

def predict(test_messages, fine_tuned_model_id):
    response = client.chat.completions.create(
        model=fine_tuned_model_id, messages=test_messages, temperature=0, max_tokens=50
    )
    return response.choices[0].message.content

def store_predictions(test_df,y_test, fine_tuned_model_id):
    test_df['Prediction'] = None
    pred = []
    for index, row in test_df.iterrows():
        test_message = format_test(row)
        prediction_result = predict(test_message, fine_tuned_model_id)
        pred.append(prediction_result)
    print("F1 Score : ", f1_score(pred,y_test))

    # test_df.to_csv("predictions.csv")

In [None]:
store_predictions(test,test['context'], fine_tuned_model_id)