In [1]:
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2')






All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [3]:
# Optional: Freeze certain layers
for layer in model.transformer.h[:8]:  # Freeze the first 8 hidden layers
    layer.trainable = False

In [5]:
# Load your custom dataset
data = pd.read_csv("Conversation.csv")
conversations = [(row['question'], row['answer']) for _, row in data.iterrows()]

In [6]:
# Prepare input-output pairs
input_ids = []
attention_masks = []
labels = []

In [8]:
# Set pad_token to eos_token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [9]:
for question, answer in conversations:
    input_text = question + tokenizer.eos_token  # Append EOS to each input
    label_text = answer + tokenizer.eos_token

    # Tokenize input and output
    input_encodings = tokenizer(input_text, return_tensors="tf", padding="max_length", max_length=40, truncation=True)
    label_encodings = tokenizer(label_text, return_tensors="tf", padding="max_length", max_length=40, truncation=True)

    input_ids.append(input_encodings.input_ids)
    attention_masks.append(input_encodings.attention_mask)
    labels.append(label_encodings.input_ids)

In [10]:
input_ids = tf.concat(input_ids, axis=0)
attention_masks = tf.concat(attention_masks, axis=0)
labels = tf.concat(labels, axis=0)

In [11]:
# Define the training dataset
dataset = tf.data.Dataset.from_tensor_slices(({'input_ids': input_ids, 'attention_mask': attention_masks}, labels))
dataset = dataset.shuffle(len(input_ids)).batch(8)  # Adjust batch size as needed

In [12]:
# Compile and train the model
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
model.compile(optimizer=optimizer, loss=model.compute_loss)  # Use built-in compute_loss

In [25]:
# Verify dataset batches
print("Checking dataset batches:")
for batch in dataset.take(1):
    print("Sample batch shapes:", batch[0]['input_ids'].shape, batch[1].shape)
    print("Batch data types:", batch[0]['input_ids'].dtype, batch[0]['attention_mask'].dtype, batch[1].dtype)

Checking dataset batches:
Sample batch shapes: (8, 40) (8, 40)
Batch data types: <dtype: 'int32'> <dtype: 'int32'> <dtype: 'int32'>


In [26]:
# Retry dataset creation to ensure proper types
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': input_ids,
        'attention_mask': attention_masks
    },
    labels
)).map(lambda x, y: (
    {
        'input_ids': tf.cast(x['input_ids'], tf.int32),
        'attention_mask': tf.cast(x['attention_mask'], tf.int32)
    },
    tf.cast(y, tf.int32)
))
dataset = dataset.shuffle(len(input_ids)).batch(8)

In [27]:
# Double-check dataset consistency after shuffling and batching
for batch in dataset.take(1):
    print("Verified batch shapes:", batch[0]['input_ids'].shape, batch[1].shape)

Verified batch shapes: (8, 40) (8, 40)


In [28]:
# Ensure model layers are trainable if desired
print("Checking layer trainability:")
for i, layer in enumerate(model.layers):
    print(f"Layer {i} ({layer.name}): trainable={layer.trainable}")

Checking layer trainability:
Layer 0 (transformer): trainable=True


In [24]:
# Attempt training again
try:
    model.fit(dataset, epochs=3)  # Adjust epochs based on dataset size and performance
except AttributeError as e:
    print("Error during training:", e)

Epoch 1/3
Error during training: in user code:

    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\training.py", line 1398, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\training.py", line 1370, in run_step  *
        outputs = model.train_step(data)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\modeling_tf_utils.py", line 1706, in train_step  *
        loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 275, in __call__  *
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_k

In [19]:
# Train the model
model.fit(dataset, epochs=3)  # Adjust epochs based on dataset size and performance

Epoch 1/3


AttributeError: in user code:

    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\training.py", line 1398, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\training.py", line 1370, in run_step  *
        outputs = model.train_step(data)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\modeling_tf_utils.py", line 1706, in train_step  *
        loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 275, in __call__  *
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\losses.py", line 143, in __call__  *
        losses = call_fn(y_true, y_pred)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\losses.py", line 270, in call  *
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\modeling_tf_utils.py", line 1588, in compute_loss  *
        return super().compute_loss(*args, **kwargs)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\training.py", line 1207, in compute_loss  *
        y, y_pred, sample_weight, regularization_losses=self.losses
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 275, in __call__  *
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    File "c:\Users\ENVY 13TH GEN\AppData\Local\Programs\Python\Python312\Lib\site-packages\tf_keras\src\engine\compile_utils.py", line 854, in match_dtype_and_rank  *
        if (y_t.dtype.is_floating and y_p.dtype.is_floating) or (

    AttributeError: 'NoneType' object has no attribute 'dtype'
