In [1]:
from transformers import GPTNeoForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict, Dataset
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the GPT-Neo model and AutoTokenizer
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")

In [3]:
dataset = load_dataset("json", data_files="jacksparrow_modified.json")

In [4]:
# Set the pad token to eos token
tokenizer.pad_token = tokenizer.eos_token

# Load your custom dataset 'jacksparrow_modified.json'
with open("jacksparrow_modified.json") as f:
    data = json.load(f)

# Flatten the conversations into a list of strings
conversations = data["conversations"]
texts = [conv["value"] for conv in conversations]

# Convert the list of strings to a Dataset
dataset = Dataset.from_dict({"text": texts})

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)


Map: 100%|██████████| 1132/1132 [00:00<00:00, 1679.77 examples/s]


In [5]:
# Split the dataset into training and validation sets (if needed)
split_datasets = tokenized_dataset.train_test_split(test_size=0.1)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./gpt_neo_finetuned",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Reduce this value
    gradient_accumulation_steps=4,  # Enable gradient accumulation
    fp16=True,  # Enable mixed precision training
    save_steps=10_000,
    save_total_limit=2,
)

In [6]:
# Trainer to train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=split_datasets["train"],
    eval_dataset=split_datasets["test"],
)

# Fine-tune the GPT-Neo model
trainer.train()

# Save the model weights after training
model.save_pretrained("./gpt_neo_finetuned")

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


OutOfMemoryError: CUDA out of memory. Tried to allocate 640.00 MiB. GPU 