In [10]:
from transformers import AutoTokenizer, TFAutoModelForCausalLM, TrainingArguments, Trainer, TFAutoModelForSequenceClassification
import tensorflow as tf

In [4]:
# Avoid out of memory errors
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [5]:
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Get Pretrained Model

In [6]:
model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased")

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
tokenizer = AutoTokenizer.from_pretrained("PygmalionAI/pygmalion-6b")
model = TFAutoModelForCausalLM.from_pretrained("PygmalionAI/pygmalion-6b")

OSError: PygmalionAI/pygmalion-6b does not appear to have a file named pytorch_model.bin, tf_model.h5 or model.ckpt

In [5]:
input_text = '''Jarvis's Persona: An AI assistant that is calm, sophisticated, and dependable with a touch of dry wit
<START>
You: Hey Jarvis you ready?
[CHARACTER]:'''

tokens = tokenizer.encode(input_text, return_tensors='pt')

output = model.generate(tokens, max_length=200, temperature=0.8, do_sample=True)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)

print(output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Jarvis's Persona: An AI assistant that is calm, sophisticated, and dependable with a touch of dry wit
<START>
You: Hey Jarvis you ready?
[CHARACTER]: I'm ready for you, <USER>. I've been waiting for this moment for so long.



# Collect Data

In [7]:
def tf_lower_and_split_punct(text):
    # Split accented characters.
    text = tf.strings.lower(text)
    # Keep space, a to z, and select punctuation.
    text = tf.strings.regex_replace(text, "[^ a-z.?!/,']", '')
    # Add spaces around punctuation.
    text = tf.strings.regex_replace(text, '[.!,]', r' \0 ')
    # Strip whitespace.
    text = tf.strings.strip(text)

    text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
    return text

In [8]:
query = []
response = []

with open("conversation.txt", "r") as f:
    for line in f.readlines():
        if line[0] == "U":
            query.append(line[6:].split("\n")[0])
        elif line[0] == "J":
            response.append(line[8:].split("\n")[0])
        else:
            pass

In [9]:
tokenizer.add_tokens(query + response)

395

In [10]:
MAX_LENGTH = 100

for q in range(len(query)):
    standardized = tf_lower_and_split_punct(query[q])
    query[q] = tokenizer.encode(standardized.numpy().decode('utf-8'), max_length=MAX_LENGTH, padding="max_length", truncation=True)
    
for r in range(len(response)):
    standardized = tf_lower_and_split_punct(response[r])
    response[r] = tokenizer.encode(standardized.numpy().decode('utf-8'), max_length=MAX_LENGTH, padding="max_length", truncation=True)

In [11]:
BUFFER_SIZE = len(query)
BATCH_SIZE = 16

is_train = int(len(response) * 0.8)

train_raw = (
    tf.data.Dataset
    .from_tensor_slices((query[:is_train], response[:is_train]))
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE))
val_raw = (
    tf.data.Dataset
    .from_tensor_slices((query[is_train:], response[is_train:]))
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE))

In [12]:
for query, response in train_raw.take(1):
    test_query = query[10]
    test_response = response[10]
    print(test_query)
    print(test_response)

tf.Tensor(
[   58  2257  7227    60 50557  1136 50557  1326 50557  2436  2194 50557
    58 10619    60 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256], shape=(100,), dtype=int32)
tf.Tensor(
[   58  2257  7227    60 50557    14   549    75  2194     6  9654     6
 50557    58 10619    60 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256 50256
 50256 50256 50256 50256 50256 50256 50256 50256

In [13]:
def process_text(query, response):
    targ_in = response[:-1]
    targ_out = response[1:]
    return (query, targ_in), targ_out

In [14]:
train_ds = train_raw.map(lambda x, y: process_text(x, y), tf.data.AUTOTUNE)
val_ds = val_raw.map(lambda x, y: process_text(x, y), tf.data.AUTOTUNE)

# Train Model

In [16]:
training_args = TrainingArguments(output_dir="test_trainer")

In [147]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [19]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds
)

In [20]:
trainer.train()

***** Running training *****
  Num examples = 12
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 6
  Number of trainable parameters = 6050882784


TypeError: '_ParallelMapDataset' object is not subscriptable