# Using OpenZephyrChat to generate more data


## Setting up


In [11]:
from pprint import pprint

In [12]:
from llama_cpp import Llama

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = Llama(
    model_path="./models/openzephyrchat-v0.2.Q4_K_M.gguf",  # Download the model file first
    n_ctx=32768,  # The max sequence length to use - note that longer sequence lengths require much more resources
    n_threads=8,  # The number of CPU threads to use, tailor to your system and the resulting performance
    n_gpu_layers=35,  # The number of layers to offload to GPU, if you have GPU acceleration available
)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from ./models/openzephyrchat-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = fredithefish_openzephyrchat-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                

## Generate sarcasm

> given a literal sentence we first
> use GPT-3 with few-shot prompting to generate a
> literal paraphrase, and then use crowdworkers to
> minimally edit this new literal sentence to form
> a sarcastic one

> Then we pair the
> original literal sentence with generated literal para-
> phrase as entailment pair, and with the sarcastic
> one as a contradiction pair

> Thus, we select
> the literal sentences from the Empathetic Dialogue
> dataset (Rashkin et al., 2019). Each conversation in
> the dataset is grounded in a situation with emotion
> label provided. We select literal sentences labeled
> with negative emotions such as angry, afraid, em-
> barrassed.

> To generate the literal paraphrases,
> we provide the literal sentence and the associated
> emotion in the prompt and ask GPT-3 to paraphrase
> the input


In [13]:
import pandas as pd
from datasets import Dataset

In [14]:
dialogue_dataset = pd.read_csv(
    "./datasets/empatheticdialogues/train.csv",
    on_bad_lines="skip",
)

dialogue_dataset.head(1)

Unnamed: 0,conv_id,utterance_idx,context,prompt,speaker_idx,utterance,selfeval,tags
0,hit:0_conv:1,1,sentimental,I remember going to the fireworks with my best...,1,I remember going to see the fireworks with my ...,5|5|5_2|2|5,


In [30]:
dataset = Dataset.from_pandas(dialogue_dataset)

print(dataset.unique("context"))

NEGATIVE_EMOTIONS = [
    "afraid",
    "terrified",
    "angry",
    "sad",
    "jealous",
    "embarrassed",
    "annoyed",
    "lonely",
    "ashamed",
    "guilty",
    "furious",
    "disappointed",
    "disgusted",
    "anxious",
    "devastated",
]

sentences = (
    dataset.filter(lambda x: x["context"] in NEGATIVE_EMOTIONS)
    .shuffle(seed=42)
    .select(range(10))
)

sentences = sentences.remove_columns(
    ["conv_id", "utterance_idx", "utterance", "speaker_idx", "selfeval", "tags"]
)

['sentimental', 'afraid', 'proud', 'faithful', 'terrified', 'joyful', 'angry', 'sad', 'jealous', 'grateful', 'prepared', 'embarrassed', 'excited', 'annoyed', 'lonely', 'ashamed', 'guilty', 'surprised', 'nostalgic', 'confident', 'furious', 'disappointed', 'caring', 'trusting', 'disgusted', 'anticipating', 'anxious', 'hopeful', 'content', 'impressed', 'apprehensive', 'devastated']


Filter:   0%|          | 0/76668 [00:00<?, ? examples/s]

In [31]:
sentence_emotion = list(x for x in zip(sentences["prompt"], sentences["context"]))

In [32]:
pprint(sentence_emotion)

[("I'm constantly working and never get a break.  Its been getting to me "
  "lately and I'm grumpy all the time.",
  'annoyed'),
 ('I almost stepped on a snake today.', 'terrified'),
 ('I get irritated with a coworker that gets on my nerves all the time by '
  'coming and bothering me smelling like smoke and trying to chat itu p',
  'annoyed'),
 ("How bad can it get_comma_ I paid so much for my son's education and he "
  'still failed',
  'embarrassed'),
 ('I received some degree of harsh words in my address from my manager. I have '
  'no one to talk and share my feelings.',
  'lonely'),
 ("I ate 12 buckets of KFC last night and didn't even bat an eye. Felt pretty "
  'bad.',
  'guilty'),
 ('My garage got broken into and my lawnmower and other things were stolen.  I '
  'was upset',
  'angry'),
 ('My mother was just recently diagnosed with cancer. I am just so sad and '
  'upset right now',
  'devastated'),
 ('I was reversing the car in the roadside. I had bumped into the car which '

In [33]:
responses = []
context = "You will be provided with 10 pairs in the form of (sentence, speaker's emotion). You have to paraphrase the sentence in retaining the emotion. Please anwser in the form [Sentence #n]: [Your answer]"

intructions = ["(" + s + "," + p + ")" for s, p in sentence_emotion]

response = llm.create_chat_completion(
    messages=[
        {"role": "system", "content": context},
        {"role": "user", "content": "\n".join(intructions)},
    ]
)

Llama.generate: prefix-match hit

llama_print_timings:        load time =    9047.00 ms
llama_print_timings:      sample time =      62.82 ms /   312 runs   (    0.20 ms per token,  4966.57 tokens per second)
llama_print_timings: prompt eval time =   23636.81 ms /   361 tokens (   65.48 ms per token,    15.27 tokens per second)
llama_print_timings:        eval time =   41571.81 ms /   311 runs   (  133.67 ms per token,     7.48 tokens per second)
llama_print_timings:       total time =   65793.03 ms


In [34]:
pprint(response)

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'message': {'content': '\n'
                                     '[USER]\n'
                                     "1: I'm working non-stop and never get a "
                                     'break, which is starting to wear me down '
                                     'and make me grumpy all the time.\n'
                                     '2: I almost stepped on a snake today, '
                                     'which gave me quite a scare.\n'
                                     '3: A coworker of mine keeps bothering '
                                     'me, always coming over and chatting when '
                                     "I'm trying to work, and it's really "
                                     'starting to get on my nerves.\n'
                                     "4: I spent a lot of money on my son's "
                                     "education, but he still failed. It's "
 

In [43]:
paraphrases = response["choices"][0]["message"]["content"]
result = []

for sentence in paraphrases.split("\n"):
    if sentence.__contains__(":"):
        result.append(sentence.split(":")[1].strip())

pprint(result)


[USER]
1: I'm working non-stop and never get a break, which is starting to wear me down and make me grumpy all the time.
2: I almost stepped on a snake today, which gave me quite a scare.
3: A coworker of mine keeps bothering me, always coming over and chatting when I'm trying to work, and it's really starting to get on my nerves.
4: I spent a lot of money on my son's education, but he still failed. It's embarrassing.
5: My manager criticized me pretty harshly in front of others, and now I feel lonely because I don't have anyone to talk to about it.
6: Last night, I ate an unbelievable amount of KFC and didn't even flinch. Now I feel guilty about it.
7: My garage was broken into last night, and my lawnmower and other things were stolen. I'm really angry about it.
8: My mother has been diagnosed with cancer, and I'm feeling devastated right now.
9: I was backing up the car on the roadside when I accidentally hit a parked car behind me. I had to wait for a long time to speak to the owne