# Updating packages

In [None]:
!pip install bitsandbytes
!pip install accelerate
!pip install --upgrade transformers
!pip install --upgrade peft
!pip install --upgrade datasets

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# Loading Model and Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device_map="auto", quantization_config=bnb_config)

In [None]:
txt = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))

# Preparing PEFT model

In [None]:
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, peft_type=TaskType.CAUSAL_LM)
model = get_peft_model(model, peft_config)

print(model.print_trainable_parameters())


# Preparing Dataset

In [None]:
def format_dataset(data_point):
    prompt = f"""###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: {data_point['act']}

###PROMPT: {data_point['prompt']}
"""
    tokens = tokenizer(prompt,
        truncation=True,
        max_length=256,
        padding="max_length",)
    tokens["labels"] = tokens['input_ids'].copy()
    return tokens


In [None]:
from datasets import load_dataset

dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")
print(dataset[0].keys())

dataset = dataset.map(format_dataset)
print(dataset[0].keys())

In [None]:
print(tokenizer.decode(dataset[0]['input_ids']))

In [None]:
dataset = dataset.remove_columns(['act', "prompt"])
print(dataset)

In [None]:
tmp = dataset.train_test_split(test_size=0.1)
train_dataset = tmp["train"]
test_dataset = tmp["test"]
print(train)
print(test)

In [None]:
import torch
if torch.cuda.device_count() > 1: 
    model.is_parallelizable = True
    model.model_parallel = True

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
                    model = model, 
                    train_dataset=train_dataset, 
                    eval_dataset = test_dataset,
                    tokenizer = tokenizer, 
                    data_collator = data_collator, 

                    args = TrainingArguments(
                        output_dir="./training",
                        remove_unused_columns=False,
                        per_device_train_batch_size=2,
                        gradient_checkpointing=True,
                        gradient_accumulation_steps=4,
                        max_steps=200,
                        learning_rate=2.5e-5, 
                        logging_steps=5,
                        fp16=True,
                        optim="paged_adamw_8bit",
                        save_strategy="steps",     
                        save_steps=50,             
                        evaluation_strategy="steps",
                        eval_steps=5,              
                        do_eval=True,
                        label_names = ["input_ids", "labels", "attention_mask"],
                        report_to = "none",
                        
                ))

In [None]:
trainer.train()

# Generating Text

In [None]:
txt = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Math Tutor

###PROMPT:"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))

# Saving PEFT model lora

In [None]:
model.save_pretrained("prompt_250_steps", safe_serialization=False, )

In [None]:
!zip -r prompt_250.zip '/kaggle/working/prompt_250_steps' 

# Loading PEFT model weights

In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, "/kaggle/working/prompt_250_steps")

# Merging PEFT Weights into Base model and saving

In [None]:
model_ = model.merge_and_unload()
model_.save_pretrained("merged_model_")