In [2]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, concatenate_datasets
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    AutoPeftModelForCausalLM,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)

from trl import SFTTrainer, SFTConfig

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

### Set up file paths

In [3]:
with open('config.json', 'r') as file:
    config = json.load(file)

In [4]:
training_dataset_path = config.get("training_dataset_path")
test_dataset_path = config.get("test_dataset_path")
dev_dataset_path = config.get("dev_dataset_path")

In [5]:
system_prompt_path = config.get("system_prompt_path")

In [6]:
checkpoint_path = config.get("checkpoint_path")

### Load datasets and system prompt:

In [7]:
raw_train = load_dataset("json", data_files=training_dataset_path, download_mode="force_redownload")["train"]
raw_test = load_dataset("json", data_files=test_dataset_path, download_mode="force_redownload")["train"]
raw_dev = load_dataset("json", data_files=dev_dataset_path, download_mode="force_redownload")["train"]

Generating train split:   0%|          | 0/2332 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/2422 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/2341 [00:00<?, ? examples/s]

In [8]:
with open(system_prompt_path, "r") as f:
    system_prompt = f.read()

### Load model and tokenizer
We need to load it before processing the data, as we are going to use the tokenizer to format the data.

In [9]:
model_path = config.get("model_path")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
model = prepare_model_for_kbit_training(model)

### Change the dataset format to chat-like text

In [12]:
def apply_chat_template(sample, tokenizer, include_response=True):
    # Combine the fields into a structured chat format
    message = []
    if len(system_prompt):
        message.append({"role": "system", "content": system_prompt})
    message.append({"role": "user", "content": sample["user"]})
    if include_response:
        message.append({"role": "assistant", "content": sample["assistant"]})

    # Use the tokenizer's chat template to create formatted text
    message = tokenizer.apply_chat_template(
        message, tokenize=False, add_generation_prompt=False
    )
    return tokenizer(message, padding=True, truncation=True)

In [12]:
# Apply processing to each dataset
processed_train = raw_train.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
)
processed_test = raw_test.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
)
processed_dev = raw_dev.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
)

Map:   0%|          | 0/2332 [00:00<?, ? examples/s]

Map:   0%|          | 0/2422 [00:00<?, ? examples/s]

Map:   0%|          | 0/2341 [00:00<?, ? examples/s]

In [18]:
long_train = concatenate_datasets([raw_train, raw_dev])

In [19]:
processed_long_train = long_train.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
)

Map:   0%|          | 0/4673 [00:00<?, ? examples/s]

### Train

In [20]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=['gate_up_proj', 'base_layer', 'down_proj', 'qkv_proj', 'o_proj'],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [22]:
trainer = SFTTrainer(
    model=model,
    args=SFTConfig(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=5,
        learning_rate=5e-5,
        max_seq_length=4,
        bf16=True,
        optim="adamw_8bit",
        lr_scheduler_type="cosine",
        warmup_ratio=0.05,
        logging_steps=10,
        save_strategy="epoch",
        output_dir=checkpoint_path,
    ),
    train_dataset=processed_long_train,#processed_train,
    #eval_dataset=processed_dev,
    peft_config=peft_config,
    tokenizer=tokenizer
)
trainer.train()



[2025-03-07 11:15:01,942] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'
/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'
/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'
/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'
/home/martinh2k3/anaconda3/envs/bp/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'
/home/martinh2k3/a

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
You are not running the flash-attention implementation, expect numerical differences.


Step,Training Loss
10,1.7951
20,1.7281
30,1.5502
40,1.2409
50,0.9132
60,0.6829
70,0.526
80,0.4893
90,0.476
100,0.4753


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=1460, training_loss=0.4352083483787432, metrics={'train_runtime': 6738.2664, 'train_samples_per_second': 3.468, 'train_steps_per_second': 0.217, 'total_flos': 1.9498163092042752e+17, 'train_loss': 0.4352083483787432, 'epoch': 4.995722840034217})

In [16]:
model.save_pretrained(checkpoint_path)

### Inference

In [17]:
config = PeftConfig.from_pretrained(checkpoint_path+"/checkpoint-145")
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.unk_token

model = PeftModel.from_pretrained(model, checkpoint_path+"/checkpoint-145")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
def prepare_for_inference(user_input: str, system_prompt: str = system_prompt):
    prompt_data = []
    if len(system_prompt):
        prompt_data.append({"role": "system", "content": system_prompt})
    prompt_data.append({"role": "user", "content": user_input})
    return tokenizer.apply_chat_template(
        prompt_data, tokenize=False, add_generation_prompt=True
    )

In [19]:
sentence: str = "A random paragraph can also be an excellent way for a writer to tackle writers' block. Writing block can often happen due to being stuck with a current project that the writer is trying to complete. By inserting a completely random paragraph from which to begin, it can take down some of the issues that may have been causing the writers' block in the first place. Another productive way, other than using xanax, to use this tool to begin a daily writing routine."

In [20]:
generation_args = {
    "max_new_tokens": 150,
    "return_full_text": False,
}

In [23]:
peft_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
output = peft_pipeline(prepare_for_inference(sentence), **generation_args)
print(output[0]["generated_text"])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

 [{"category": "Chemical", "entity": "xanax"}]
