<a href="https://colab.research.google.com/github/Sam-Joshua-S/100DaysOfData/blob/main/Day%2042-Finetuning_Phi1_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing Dependencies

In [None]:
! pip install -q accelerate transformers einops datasets peft bitsandbytes

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Importing Dependencies

In [None]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import os

# Finetuning

In [None]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-1_5",
    device_map={"":0},
    trust_remote_code=True,
    quantization_config=bnb_config
)

In [None]:
model

MixFormerSequentialForCausalLM(
  (layers): Sequential(
    (0): Embedding(
      (wte): Embedding(51200, 2048)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (1): ParallelBlock(
      (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      (resid_dropout): Dropout(p=0.0, inplace=False)
      (mixer): MHA(
        (rotary_emb): RotaryEmbedding()
        (Wqkv): Linear4bit(in_features=2048, out_features=6144, bias=True)
        (out_proj): Linear4bit(in_features=2048, out_features=2048, bias=True)
        (inner_attn): SelfAttention(
          (drop): Dropout(p=0.0, inplace=False)
        )
        (inner_cross_attn): CrossAttention(
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (mlp): MLP(
        (fc1): Linear4bit(in_features=2048, out_features=8192, bias=True)
        (fc2): Linear4bit(in_features=8192, out_features=2048, bias=True)
        (act): NewGELUActivation()
      )
    )
    (2): ParallelBlock(
      (ln): LayerNorm((2048,), eps=1

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["Wqkv", "out_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 4,718,592 || all params: 1,422,989,312 || trainable%: 0.3315971497613047


In [None]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MixFormerSequentialForCausalLM(
      (layers): Sequential(
        (0): Embedding(
          (wte): Embedding(51200, 2048)
          (drop): Dropout(p=0.0, inplace=False)
        )
        (1): ParallelBlock(
          (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (mixer): MHA(
            (rotary_emb): RotaryEmbedding()
            (Wqkv): Linear4bit(
              in_features=2048, out_features=6144, bias=True
              (lora_dropout): ModuleDict(
                (default): Dropout(p=0.05, inplace=False)
              )
              (lora_A): ModuleDict(
                (default): Linear(in_features=2048, out_features=16, bias=False)
              )
              (lora_B): ModuleDict(
                (default): Linear(in_features=16, out_features=6144, bias=False)
              )
              (lora_embedding_A): ParameterDict()
       

In [None]:
def tokenize(sample):
    model_inps =  tokenizer(sample["text"], padding=True, truncation=True, max_length=512)
    return model_inps

In [None]:
data = load_dataset("gsm8k", "main", split="train")
data_df = data.to_pandas()
data_df["text"] = data_df[["question", "answer"]].apply(lambda x: "question: " + x["question"] + " answer: " + x["answer"], axis=1)
data = Dataset.from_pandas(data_df)
tokenized_data = data.map(tokenize, batched=True, desc="Tokenizing data", remove_columns=data.column_names)
tokenized_data

Tokenizing data:   0%|          | 0/7473 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 7473
})

In [None]:
training_arguments = TrainingArguments(
        output_dir="phi-1_5-finetuned-gsm8k",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=100,
        max_steps=1000,
        num_train_epochs=1,
        push_to_hub=True
    )

In [None]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_data,
    args=training_arguments,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()
trainer.push_to_hub()

You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
100,1.1626
200,1.0547
300,1.0147
400,1.053
500,1.017
600,1.0335
700,1.012
800,1.0273
900,1.0122
1000,1.0291


'https://huggingface.co/SamJoshua/phi-1_5-finetuned-gsm8k/tree/main/'

# Saving

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype=torch.float32)
peft_model = PeftModel.from_pretrained(model, "SamJoshua/phi-1_5-finetuned-gsm8k", from_transformers=True)
model = peft_model.merge_and_unload()
model

Downloading (…)/adapter_config.json:   0%|          | 0.00/440 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

MixFormerSequentialForCausalLM(
  (layers): Sequential(
    (0): Embedding(
      (wte): Embedding(51200, 2048)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (1): ParallelBlock(
      (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      (resid_dropout): Dropout(p=0.0, inplace=False)
      (mixer): MHA(
        (rotary_emb): RotaryEmbedding()
        (Wqkv): Linear(in_features=2048, out_features=6144, bias=True)
        (out_proj): Linear(in_features=2048, out_features=2048, bias=True)
        (inner_attn): SelfAttention(
          (drop): Dropout(p=0.0, inplace=False)
        )
        (inner_cross_attn): CrossAttention(
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (mlp): MLP(
        (fc1): Linear(in_features=2048, out_features=8192, bias=True)
        (fc2): Linear(in_features=8192, out_features=2048, bias=True)
        (act): NewGELUActivation()
      )
    )
    (2): ParallelBlock(
      (ln): LayerNorm((2048,), eps=1e-05, elementwis

In [None]:
model.push_to_hub("SamJoshua/phi-1_5-finetuned-gsm8k")

pytorch_model.bin:   0%|          | 0.00/5.67G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SamJoshua/phi-1_5-finetuned-gsm8k/commit/0da70a2ec9406f5f671a620cc02d90e66e3af640', commit_message='Upload MixFormerSequentialForCausalLM', commit_description='', oid='0da70a2ec9406f5f671a620cc02d90e66e3af640', pr_url=None, pr_revision=None, pr_num=None)

# Inference

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, AutoModel
from rich import print as rprint
from rich.markdown import Markdown

tokenizer = AutoTokenizer.from_pretrained('microsoft/phi-1_5', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained('SamJoshua/phi-1_5-finetuned-gsm8k', trust_remote_code=True, eos_token_id=tokenizer.eos_token_id).cuda()

prompt = """question:"Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?"	 answer:"""
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.cuda()
output_ids = model.generate(input_ids, max_new_tokens=100, do_sample=True, top_p=0.9, top_k=0, temperature=0.01, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, repetition_penalty=1.2)
rprint(Markdown(tokenizer.decode(output_ids[0], skip_special_tokens=True)))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
