In [1]:
%%capture
#!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
#!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

Getting the model and the tokenizer

In [None]:

#from unsloth import FastLanguageModel
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType
import torch
from torch.utils.data import DataLoader
from datasets import Dataset
from tqdm import tqdm
import json
import os
import datetime

### Training hyperparameters ### 
device = "cuda"
max_length = 3000
lr = 1e-2
num_epoch = 50
batch_size = 2
is_wandb = False
load_in_4bit = True
train_prompt = "long"
save_epochs = 50
num_virtual_tokens = 200 ### Specify here the number of virtual tokens to be trained
max_length = num_virtual_tokens + max_length

### Paths to define ###
model_dir = "path_to_base_model"
used_model = "unsloth-Llama-3.1-8B" # Model name for documentation. Change if you use a different LLM.
model_path = "./models"
chatter_path = './experiments/...new_outputs.json' # Synthetic chatter to be used as prompt tuning data
prompt_file = "./prompts/prompt_file.txt" # Change to the path of the prompt file for the task category of the data you are finetuning with

with open(chatter_path, 'r') as file:
        chatters = json.load(file)
task_name = chatters["task_name"]
results_path = f'./models/prompt_tuning/{task_name}/{used_model}'

with open(chatter_path, 'r') as file:
        chatters = json.load(file)
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    prompt_tuning_init_text= chatters[chatters["task_name"]]["instruction"],
    num_virtual_tokens=num_virtual_tokens,
    tokenizer_name_or_path=model_dir,
)

current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")

if load_in_4bit:
    quantization = '_4bit'
else:
    quantization = ''

save_dir = f"{results_path}{quantization}_{current_time}"


config = {
        "num_virtual_tokens": num_virtual_tokens,
        "load_in_4bit": load_in_4bit,
        "model_dir": model_dir,
        "used_model": used_model,
        "model_path": model_path,
        "chatter_path": chatter_path,
        "results_path": results_path,
        "num_epoch": num_epoch,
        "learning_rate": lr
    }


In [3]:
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=load_in_4bit,  # 4-bit quantization
    bnb_4bit_compute_dtype=torch.bfloat16,  # bfloat16 for Ampere+, float16 for older GPUs
    bnb_4bit_use_double_quant=False,  # Double quantization saves memory
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_dir)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Load model and prepare for prompt tuning
model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    quantization_config=quantization_config if load_in_4bit else None
)
if is_wandb:
    import wandb
    wandb.init(project="llama_finetuning", reinit=True, config=config)

model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())  # Verify trainable parameters

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

trainable params: 819,200 || all params: 8,031,080,448 || trainable%: 0.0102
None


Preparing the dataset and the prompt

In [4]:
# Dataset processing
def preprocess_function(examples):
    inputs = [f"Instruction: {instruction}\nInput: {input_text}\nResponse:"
              for instruction, input_text in zip(examples["instruction"], examples["input"])]
    targets = examples["output"]

    # Concatenate inputs and targets
    full_texts = [text for text in examples["text"]]

    # Tokenize the concatenated inputs and targets with padding and truncation
    tokenized_outputs = tokenizer(
        full_texts,
        max_length=max_length,
        truncation=True,
        padding='max_length',
        return_tensors='pt'
    )

    # Create labels by copying tokenized outputs
    labels = tokenized_outputs['input_ids'].clone()

    # Determine the length of the inputs to mask them in the labels
    inputs_tokenized = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding='max_length',
        return_tensors='pt'
    )

    #Mask the input tokens in the labels
    prompt_lengths = (inputs_tokenized['attention_mask'] > 0).sum(dim=1)
    for i, prompt_length in enumerate(prompt_lengths):
        labels[i, :prompt_length] = -100  # Mask input tokens

    tokenized_outputs['labels'] = labels

    return tokenized_outputs

# Formatting your dataset
def format_dataset(chatters):
    with open(prompt_file, "r") as f:
        base_prompt = "\n".join(f.readlines())    

    prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Context:
    {}

    ### Maritime Radio Chatter:
    {}"""

    EOS_TOKEN = "<|end_of_text|>"

    formatted_data = []
    for instance in chatters[task_name]["instances"]:
        instruction = chatters[chatters["task_name"]]["instruction"] if train_prompt == "short" else base_prompt.replace("Task: ", "")
        input_data = json.dumps(instance["input"], ensure_ascii=False, indent=4)
        output_data = '\n'.join(instance["output"])

        formatted_data.append({
            "instruction": instruction,
            "input": input_data,
            "output": output_data,
            "text": prompt.format(instruction, input_data, output_data) + EOS_TOKEN,
        })
    
    return Dataset.from_list(formatted_data)

In [5]:
# Load and preprocess dataset
raw_dataset= format_dataset(chatters)
processed_dataset = raw_dataset.map(preprocess_function, batched=True, remove_columns=["instruction", "input", "output", "text"], num_proc=1)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [6]:
print(raw_dataset[0])
all(label == -100 for label in processed_dataset[0]['labels'])

{'instruction': 'You are a creative expert of maritime industry and undesignated distresses of vessels. Generate a maritime radio chatter which complies with the IMO Standard marine Communication Phrases. A vessel makes a distress call and reports an undesignated distress.\n\n\n\nTips about creating original scenarios:\n\n- Describe different situations of the crew. Be creative.\n\n- Mention specific strategies the crew is using to cope with the undesignated distress. Be creative.\n\n- Provide ETAs which match with the vessel coordinate and the destination. Provide various types of help available from the Coast Guard. Be creative.\n\n- Make the conversation flow naturally. Do not make people say same phrases repeatedly.\n\n- Do not generate any other text but the radio chatter. Keep the generation limited to the radio chatter.\n\n- All distress calls must start with "Mayday, Mayday, Mayday". Ship gives its location in terms of degrees and tells the nature of disaster and the help neede

False

In [7]:
max_len = 0
for entry in raw_dataset:
    if len(str(entry)) > max_len:
        max_len = len(str(entry))
print(max_len)         

def count_tokens(text, tokenizer):
    # Tokenize the text
    tokenized_output = tokenizer(text, return_tensors=None)
    
    # Get the number of tokens
    num_tokens = len(tokenized_output["input_ids"])
    
    return num_tokens
str(raw_dataset[0]["text"])
print(count_tokens(str(raw_dataset[0]["text"]), tokenizer))

11429
1167


In [8]:
# Split dataset
train_dataset = processed_dataset  # Assuming all data is used for training
train_dataloader = DataLoader(train_dataset, shuffle=False, collate_fn=default_data_collator, batch_size=batch_size)

In [9]:
data_to_save = {
        "task_name": task_name,
        "num_virtual_tokens": num_virtual_tokens,
        "load_in_4bit": load_in_4bit,
        "model_name": used_model,
        "model_path": f"{results_path}{quantization}_{current_time}",
        "chatter_path": chatter_path,
        "num_epoch": num_epoch,
        "learning_rate": lr,
        "prompt": train_prompt,
}


In [10]:
len(processed_dataset[0]['labels'])

3200

In [11]:
# for name, param in model.named_parameters():
#        print(name, param.requires_grad)

In [12]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA RTX A6000. Max memory = 47.319 GB.
5.846 GB of memory reserved.


In [13]:
# Optimizer and scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr)
num_training_steps = len(train_dataloader) * num_epoch  
lr_scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

gradient_accumulation_steps = 4

# Training loop
model.to(device, dtype=torch.bfloat16)
model.train()
for epoch in range(num_epoch):
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch + 1}")):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()

        if (step + 1) % gradient_accumulation_steps == 0:  # Update weights every few steps
            optimizer.step()
            optimizer.zero_grad()
            lr_scheduler.step()

         # Log metrics to wandb
        if is_wandb:
            wandb.log({"epoch": epoch, "loss": loss.item(), "lr": optimizer.param_groups[0]["lr"]})

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1} completed. Average Loss: {avg_loss}")

    # Save model every `save_epochs`
    # if (epoch + 1) % save_epochs == 0:
    #     model.save_pretrained(os.path.join(save_dir,f'checkpoint{epoch + 1}')) # Local saving
    #     tokenizer.save_pretrained(os.path.join(save_dir,f'checkpoint{epoch + 1}'))
    #     # Save results to JSON
    #     with open(f'{os.path.join(save_dir, f"checkpoint{epoch + 1}")}/hyperparameters.json', 'w') as outfile:
    #         json.dump(data_to_save, outfile, ensure_ascii=False, indent=4)
    #     print(f"Model saved at epoch {epoch+1} in {os.path.join(save_dir,f'checkpoint{epoch + 1}')}")

Epoch 1:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/250 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.25 GiB. GPU 0 has a total capacity of 47.32 GiB of which 725.94 MiB is free. Including non-PyTorch memory, this process has 46.50 GiB memory in use. Of the allocated memory 45.50 GiB is allocated by PyTorch, and 701.17 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model.save_pretrained(save_dir) # Local saving
tokenizer.save_pretrained(save_dir)
# Save results to JSON
with open(f'{save_dir}/hyperparameters.json', 'w') as outfile:
    json.dump(data_to_save, outfile, ensure_ascii=False, indent=4)

In [None]:
print(save_dir)

/home/gakdeniz/Dev/ma-llm-tuning/models/prompt_tuning/reporting_undesignated_distress/unsloth-Llama-3.1-8B_20250323_0728


Training

In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

Peak reserved memory = 37.225 GB.
Peak reserved memory for training = 37.225 GB.
Peak reserved memory % of max memory = 78.668 %.
Peak reserved memory for training % of max memory = 78.668 %.


TESTING THE PROMPT TUNED MODEL

In [None]:
from peft import PeftModel
import geo_test

land_shapefile = "./GSHHS_dataset/GSHHS_shp/f/GSHHS_f_L1.shp" # path of the GSHHS data
geonames_data_path = './all_countries/allCountries.txt' # path of the geonames data
ship_data_path = './data/ship_data_dk_us.pkl' # path of the vessel data

_, scenario_input = geo_test.MaritimeAnalysis(land_shapefile, geonames_data_path, ship_data_path).execute([])

with open(prompt_file, "r") as f:
    base_prompt = "\n".join(f.readlines())

prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Context:
{}

### Maritime Radio Chatter:
{}"""

device = "cuda"

model = model.to(device)

inputs = tokenizer(
[
    prompt.format(
        chatters[chatters["task_name"]]["instruction"] if train_prompt == "short" else base_prompt.replace("Task: ", ""), # instruction
        str(scenario_input), # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

# outputs = model.generate(**inputs, max_new_tokens = 400, use_cache = True)
# print(tokenizer.batch_decode(outputs))

outputs = model.generate(
                **inputs,
                max_new_tokens=400,
                use_cache = True,
                # do_sample=True,
                # top_p=0.9,
                # temperature=0.9,
                # min_length=None,
                # top_k=400,
                # repetition_penalty=1,
                # length_penalty=1,
            )
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]


  self.geonames_df = pd.read_csv(geonames_data_path, sep='\t', header=None,


NameError: name 'prompt_file' is not defined

In [None]:
output_text

'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are a creative expert of maritime industry and undesignated distresses of vessels. Generate a maritime radio chatter which complies with the IMO Standard marine Communication Phrases. A vessel makes a distress call and reports an undesignated distress.\n\n\n\nTips about creating original scenarios:\n\n- Describe different situations of the crew. Be creative.\n\n- Mention specific strategies the crew is using to cope with the undesignated distress. Be creative.\n\n- Provide ETAs which match with the vessel coordinate and the destination. Provide various types of help available from the Coast Guard. Be creative.\n\n- Make the conversation flow naturally. Do not make people say same phrases repeatedly.\n\n- Do not generate any other text but the radio chatter. Keep the generation limited to the radio chatter

In [None]:
wandb.finish()

NameError: name 'wandb' is not defined

In [None]:
%reset -f