In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl sentencepiece triton

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
import torch

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [3]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

In [4]:
def format_prompt(ex):
  chat = ex["messages"]
  prompt = tokenizer.apply_chat_template(chat, tokenize=False)
  return {"text": prompt}

In [5]:
dataset = load_dataset("HuggingFaceH4/ultrachat_200k", split="test_sft").shuffle(seed=42).select(range(3000))

In [6]:
dataset = dataset.map(format_prompt)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [7]:
print(dataset["text"][16])

<|user|>
Develop a 30-minute long podcast episode that discusses the various ways technology has influenced the processes, practices, and possibilities in the music industry. Include interviews with musicians, producers, and experts in the field to gain diverse perspectives on this topic. Explore specific examples of how technology has changed the music industry, including the rise of streaming services, the impact of social media, and the evolution of music production tools. Incorporate examples of both positive and negative effects of technology on music consumption, creative production, distribution, and revenue streams. Use a storytelling approach to engage listeners in understanding the complex relationship between technology and the music industry, and offer insights into future directions for this dynamic field.</s>
<|assistant|>
Introduction:

Music has always been an integral part of human culture, and with the advent of technology, its reach and influence have only grown. Tod

In [8]:
!pip install --upgrade bitsandbytes



In [9]:
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = "float16",
    bnb_4bit_use_double_quant = True
)

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)
model.config.use_cache = False
model.config.pretraining_tp = 1

In [11]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True)
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

In [13]:

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

In [14]:
peft_config = LoraConfig(
    lora_alpha= 32,
    lora_dropout = 0.1,
    r = 64,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules = ["k_proj", "gate_proj", "v_proj", "up_proj", "q_proj", "o_proj", "down_proj"]
)

In [15]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [16]:
from transformers import TrainingArguments, DataCollatorForSeq2Seq

In [17]:
output_dir = "./results"
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True
)

In [18]:
from trl import SFTTrainer, SFTConfig

In [20]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    max_seq_length=512,
    peft_config=peft_config,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [21]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mabhinavm16104[0m ([33mabhinavm16104-delhi-skill-and-entrepreneurship-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss
10,1.6713
20,1.4756
30,1.4509
40,1.4884
50,1.4778
60,1.3905
70,1.4949
80,1.45
90,1.4274
100,1.404


TrainOutput(global_step=375, training_loss=1.4168808301289877, metrics={'train_runtime': 1462.0005, 'train_samples_per_second': 2.052, 'train_steps_per_second': 0.256, 'total_flos': 9994755938844672.0, 'train_loss': 1.4168808301289877, 'epoch': 1.0})

In [23]:
trainer.model.save_pretrained("TinyLlama-1.1B-qlora-mango")



In [22]:
from peft import AutoPeftModelForCausalLM

In [24]:
model = AutoPeftModelForCausalLM.from_pretrained(
    "TinyLlama-1.1B-qlora-mango",
    low_cpu_mem_usage=True,
    device_map="auto"
)

In [25]:
merged_model = model.merge_and_unload()

In [26]:
from transformers import pipeline

In [27]:
prompt = """<|user|>
Tell me something about mangoes.</s>
<|assistant|>"""

In [28]:
pipe = pipeline(task="text-generation", model = merged_model, tokenizer = tokenizer)

In [29]:
print(pipe(prompt)[0]["generated_text"])

<|user|>
Tell me something about mangoes.</s>
<|assistant|>
Mangoes are a type of fruit that originated in Southeast Asia and are now grown in many parts of the world. They are known for their sweet, juicy, and tart flavor, and they are often used in desserts, salads, and other dishes. Mangoes are also a source of vitamin C, which is important for maintaining healthy skin, teeth, and bones.


In [None]:
merged_model.save_pretrained("TinyLlama-1.1B-qlora-mango_model")

In [None]:
tokenizer.save_pretrained("TinyLlama-1.1B-qlora-mango_model")