In [9]:
%pip install -q "accelerate>=0.26.0" transformers datasets peft torch

import pandas as pd
import os
# reduce fragmentation (set before heavy allocations)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
import torch


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [2]:
df = pd.read_csv('ord-quest.csv')
# remove ID, Group, ChangeGroup, ChangeCards, ChangeGroup1 and ChangeCards1 columns
df = df.drop(columns=['ID', 'Group', 'ChangeGroup', 'ChangeCards', 'ChangeGroup1', 'ChangeCards1'])
# remove duplicate rows
df = df.drop_duplicates()
df

Unnamed: 0,EventName,ChoiceName,OutcomeName,ChoiceName1,OutcomeName1
0,Alarm.,Wake.,Quest.,Snooze.,Snore.
5,Home.,Leave.,Forest.,Stay.,Cozy.
6,Dog.,Pet.,Woof.,Play.,Woof!
7,Stick.,Throw.,Fetch.,Pretend.,Woof?
8,Coffee.,Drink.,Awake.,Leave.,Bitter.
...,...,...,...,...,...
302,Library.,Stay.,Books.,Leave.,Dusty.
303,Book.,Inspect.,Adventure.,Throw.,Thunk.
304,Book.,Read.,Epic.,Discard.,Distraction.
305,Chicken.,Cross.,Road.,Stop.,Boring.


In [3]:
# --- 3. Prepare prompt/completion pairs for training ---

def make_prompt(row):
    """
    Format one row into a simple prompt → target text pair.
    """
    return (
        f"Event: {row['EventName']} "
        f"Choices: {row['ChoiceName']} / {row['ChoiceName1']} "
        f"Outcomes: {row['OutcomeName']} / {row['OutcomeName1']}"
    )

df["text"] = df.apply(make_prompt, axis=1)
dataset = Dataset.from_pandas(df[["text"]])
dataset = dataset.train_test_split(test_size=0.1, seed=42)

# peek
print(dataset["train"][0])

{'text': 'Event: Guillotine. Choices: Test. / Inspect. Outcomes: Works. / Equaliser.', '__index_level_0__': 194}


In [5]:
# --- 4. Tokenization (fixed) ---
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"   # small, local-friendly base model
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(batch):
    encodings = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=64,
    )
    # Copy input_ids as labels so Trainer can compute loss
    encodings["labels"] = encodings["input_ids"].copy()
    return encodings

tokenized_ds = dataset.map(tokenize, batched=True, remove_columns=["text"])


Map: 100%|██████████| 259/259 [00:00<00:00, 19290.15 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 6294.82 examples/s]


In [11]:
# --- 5. Load model ---
# Option A (recommended): load in fp16 and enable gradient checkpointing
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)
model.gradient_checkpointing_enable()
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.eos_token_id
model.to("cuda" if torch.cuda.is_available() else "cpu")


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rot

In [13]:
# --- 6. Fine-tuning setup ---
training_args = TrainingArguments(
    output_dir="ord_ft",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=5e-5,
    warmup_steps=50,
    weight_decay=0.01,
    logging_steps=10,
    # older transformers: use eval_steps/save_steps instead of evaluation_strategy/save_strategy
    eval_steps=500,
    save_steps=500,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
)


In [14]:
# --- 7. Train ---
trainer.train()

# Save locally
trainer.save_model("ord_tuned_distilgpt2")
tokenizer.save_pretrained("ord_tuned_distilgpt2")


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


OutOfMemoryError: CUDA out of memory. Tried to allocate 22.00 MiB. GPU 0 has a total capacity of 3.62 GiB of which 32.50 MiB is free. Including non-PyTorch memory, this process has 3.10 GiB memory in use. Of the allocated memory 2.96 GiB is allocated by PyTorch, and 52.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [15]:
# --- 8. Quick test generation ---
from transformers import pipeline

pipe = pipeline("text-generation", model="ord_tuned_distilgpt2", tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

prompt = "Event: Coffee. Choices: Drink / Leave Outcomes:"
gen = pipe(prompt, max_new_tokens=8, temperature=0.9, num_return_sequences=3)

for i, g in enumerate(gen, 1):
    print(f"\n== Sample {i} ==")
    print(g["generated_text"])


Device set to use cuda:0



== Sample 1 ==
Event: Coffee. Choices: Drink / Leave Outcomes: Boring. / Hungry.

== Sample 2 ==
Event: Coffee. Choices: Drink / Leave Outcomes: Healthy. / Hungry. Outcomes:

== Sample 3 ==
Event: Coffee. Choices: Drink / Leave Outcomes: Coward. / Eaten.


In [16]:
# save model and tokenizer
model.save_pretrained("ord_tuned_distilgpt2")
tokenizer.save_pretrained("ord_ft_tokenizer")

('ord_ft_tokenizer/tokenizer_config.json',
 'ord_ft_tokenizer/special_tokens_map.json',
 'ord_ft_tokenizer/vocab.json',
 'ord_ft_tokenizer/merges.txt',
 'ord_ft_tokenizer/added_tokens.json',
 'ord_ft_tokenizer/tokenizer.json')