In [1]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

model_id = 'defog/sqlcoder-7b'
os.environ["report_to"] = "None"

bnb_config = BitsAndBytesConfig(
                                load_in_8bit=True,
                                bnb_8bit_use_double_quant=True,
                                bnb_8bit_quant_type="nf4",
                                bnb_8bit_compute_dtype=torch.bfloat16
                                )

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={'': 0})
tokenizer = AutoTokenizer.from_pretrained(model_id, padding='max_length')

  from .autonotebook import tqdm as notebook_tqdm


/home/ksaff/.cache/huggingface/hub/models--defog--sqlcoder-7b/snapshots/cb59ea29ef43769d221b98a8ebe81a0175c76658/pytorch_model.bin.index.json
['/home/ksaff/.cache/huggingface/hub/models--defog--sqlcoder-7b/snapshots/cb59ea29ef43769d221b98a8ebe81a0175c76658/pytorch_model-00001-of-00002.bin', '/home/ksaff/.cache/huggingface/hub/models--defog--sqlcoder-7b/snapshots/cb59ea29ef43769d221b98a8ebe81a0175c76658/pytorch_model-00002-of-00002.bin']


Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.42s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [3]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [4]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 6815744 || all params: 7248547840 || trainable%: 0.0940290959023318


In [5]:
from datasets import load_dataset

data = load_dataset("/home/ksaff/Desktop/ttyd/fine_tuning/dataset/", 'csv')

Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 12087.33it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 140.49it/s]
Generating train split: 1051 examples [00:00, 99649.92 examples/s]


In [6]:
import transformers
from datasets import load_dataset

data = load_dataset("/home/ksaff/Desktop/ttyd/fine_tuning/dataset/")

Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 12985.46it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 2870.84it/s]
Generating train split: 1051 examples [00:00, 305658.96 examples/s]


In [7]:
data = data.map(lambda samples: tokenizer(samples["text"]), batched=True)

Map: 100%|██████████| 1051/1051 [00:00<00:00, 22272.70 examples/s]


In [8]:
tokenizer.pad_token = tokenizer.eos_token

In [9]:
import logging

logging.getLogger('codecarbon').setLevel(logging.WARNING)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=200,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

[codecarbon INFO @ 23:14:26] [setup] RAM Tracking...
[codecarbon INFO @ 23:14:26] [setup] GPU Tracking...
[codecarbon INFO @ 23:14:26] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 23:14:26] [setup] CPU Tracking...
[codecarbon INFO @ 23:14:27] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i5-13600KF
[codecarbon INFO @ 23:14:27] >>> Tracker's metadata:
[codecarbon INFO @ 23:14:27]   Platform system: Linux-6.2.0-39-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 23:14:27]   Python version: 3.10.13
[codecarbon INFO @ 23:14:27]   CodeCarbon version: 2.2.3
[codecarbon INFO @ 23:14:27]   Available RAM : 31.196 GB
[codecarbon INFO @ 23:14:27]   CPU count: 20
[codecarbon INFO @ 23:14:27]   CPU model: 13th Gen Intel(R) Core(TM) i5-13600KF
[codecarbon INFO @ 23:14:27]   GPU count: 1
[codecarbon INFO @ 23:14:27]   GPU model: 1 x NVIDIA GeForce RTX 3090
  0%|          | 0/200 [00:00<?, ?it/s]You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokeniz

OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.66 GiB of which 33.75 MiB is free. Process 1587322 has 12.10 GiB memory in use. Including non-PyTorch memory, this process has 10.32 GiB memory in use. Of the allocated memory 9.70 GiB is allocated by PyTorch, and 310.15 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

[codecarbon INFO @ 23:14:46] Energy consumed for RAM : 0.000049 kWh. RAM Power : 11.698554039001465 W
[codecarbon INFO @ 23:14:46] Energy consumed for all GPUs : 0.000556 kWh. Total GPU Power : 133.43 W
[codecarbon INFO @ 23:14:46] Energy consumed for all CPUs : 0.000177 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 23:14:46] 0.000782 kWh of electricity used since the beginning.


In [None]:
trainer.save_model("1st_try")