In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "codellama/CodeLlama-34b-hf"


bnb_config = BitsAndBytesConfig(
                                load_in_4bit=True,
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_compute_dtype=torch.bfloat16
                                )

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={'': 0})
tokenizer = AutoTokenizer.from_pretrained(model_id, padding='max_length')

  from .autonotebook import tqdm as notebook_tqdm


/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model.safetensors.index.json
['/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model-00001-of-00007.safetensors', '/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model-00002-of-00007.safetensors', '/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model-00003-of-00007.safetensors', '/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model-00004-of-00007.safetensors', '/home/ksaff/.cache/huggingface/hub/models--codellama--CodeLlama-34b-hf/snapshots/d3e967887d285343b8e239e26c6778c26931a536/model-00005-of-00007.safetensors', '/home/ksaff/.cache/huggingface/hub/models--codellama--Cod

Loading checkpoint shards: 100%|██████████| 7/7 [00:20<00:00,  2.88s/it]


In [2]:
from peft import LoraConfig, get_peft_model
from peft import prepare_model_for_kbit_training

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

model = prepare_model_for_kbit_training(model)
config = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 19660800 || all params: 17154187264 || trainable%: 0.11461225004381531


In [3]:
from datasets import load_dataset

data = load_dataset("/home/ksaff/Desktop/ttyd/fine_tuning/dataset/", 'csv')
data = data.map(lambda samples: tokenizer(samples["text"]), batched=True)

Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 16320.25it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 129.49it/s]
Generating train split: 1051 examples [00:00, 67791.55 examples/s]
Map: 100%|██████████| 1051/1051 [00:00<00:00, 15802.43 examples/s]


In [None]:
import logging
import transformers

tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=200,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

trainer.train()

In [7]:
trainer.save_model("fine_tuned_model")