In [None]:
!pip install datasets peft transformers

In [None]:
from datasets import load_dataset
data = load_dataset("facebook/flores","all")

In [None]:
print(data)

In [None]:
train_data = data['dev']

In [None]:
test_data = data['devtest']

In [None]:
test_data[0]

In [None]:
from transformers import AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType
model_name_or_path = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
tokenizer_name_or_path = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

peft_config = LoraConfig(
    task_type=TaskType.CASUAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

model = AutoPeftModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T").to("cuda")
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T")

model.eval()
inputs = tokenizer("Preheat the oven to 350 degrees and place the cookie dough", return_tensors="pt")

outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=50)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

### Test

In [None]:
!pip install transformers trl datasets accelerate peft bitsandbytes

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM,BitsAndBytesConfig, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer
import torch

In [None]:
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name)
model = prepare_model_for_kbit_training(model)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, add_eos_token=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "left"

In [None]:
dataset = load_dataset("kaitchup/opus-French-to-English")

In [None]:
peft_config = LoraConfig(
            lora_alpha=16, 
            lora_dropout=0.05,
            r=16,
            bias="none",
            task_type="CAUSAL_LM",
            target_modules= ["down_proj","up_proj","gate_proj"]
)

In [None]:
peft_config = LoraConfig(
            lora_alpha=16,
            lora_dropout=0.05,
            r=64,
            bias="none",
            task_type="CAUSAL_LM",
            target_modules= ["q_proj","up_proj","o_proj","k_proj","down_proj","gate_proj","v_proj"]
)

In [None]:
training_arguments = TrainingArguments(
        output_dir="./results/",
        evaluation_strategy="steps",
        optim="paged_adamw_8bit",
        save_steps=500,
        log_level="debug",
        logging_steps=500,
        learning_rate=1e-4,
        eval_steps=500,
        # fp16=True,
        do_eval=True,
        per_device_train_batch_size=96,
        per_device_eval_batch_size=96,
        #gradient_accumulation_steps=1,
        warmup_steps=100,
        max_steps=3000,
        lr_scheduler_type="linear"
)

In [None]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset['train'],
        eval_dataset=dataset['validation'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=120,
        tokenizer=tokenizer,
        args=training_arguments
)

In [None]:
trainer.train()