In [None]:
!pip install bitsandbytes accelerate datasets==2.16.0 trl peft einops numpy==1.22.4

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

torch.set_default_device("cpu")

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

In [10]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    quantization_config=bnb_config,
    trust_remote_code=True,
    flash_attn=True,
    flash_rotary=True,
    fused_dense=True,
    low_cpu_mem_usage=True,
    device_map={"": 0},
    revision="refs/pr/23",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
instruction_template = "### Human:"
response_template = "### Assistant:"

tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", use_fast=True)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"

initial_token_count = len(tokenizer)
added_token_count = tokenizer.add_special_tokens({"additional_special_tokens": [response_template, instruction_template]})

model.resize_token_embeddings(new_num_tokens=initial_token_count+added_token_count)

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Embedding(50297, 2560)

In [6]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    evaluation_strategy="steps",
    eval_steps=200,
    logging_steps=15,
    optim="paged_adamw_8bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_steps=2000,
    warmup_ratio=0.05,
    weight_decay=0.01,
    report_to="tensorboard",
    max_steps=-1, # if maximum steps=2, it will stop after two steps
)

In [7]:
from peft import LoraConfig
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules= ["Wqkv", "fc1", "fc2" ] # ["Wqkv", "out_proj", "fc1", "fc2" ], - 41M params
    # modules_to_save=["embed_tokens","lm_head"]
)

In [8]:
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train[:20%]")

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"### Human: {example['instruction'][i]}\n ### Assistant: {example['output'][i]}"
        output_texts.append(text)
    return output_texts

collator = DataCollatorForCompletionOnlyLM(instruction_template=instruction_template, response_template=response_template, tokenizer=tokenizer, mlm=False)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    eval_dataset=dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
    formatting_func=formatting_prompts_func,
    data_collator=collator,
    max_seq_length=720
)

Downloading readme:   0%|          | 0.00/677 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.45M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/20022 [00:00<?, ? examples/s]



Map:   0%|          | 0/4004 [00:00<?, ? examples/s]

Map:   0%|          | 0/4004 [00:00<?, ? examples/s]

In [11]:
trainer.train()

Step,Training Loss,Validation Loss
200,0.695,0.496433
400,0.5408,0.457986


TrainOutput(global_step=500, training_loss=0.621560227394104, metrics={'train_runtime': 5501.6704, 'train_samples_per_second': 1.456, 'train_steps_per_second': 0.091, 'total_flos': 1.1586597374300508e+16, 'train_loss': 0.621560227394104, 'epoch': 2.0})

In [19]:
inputs = tokenizer.encode("### Human: Write a python function to print the nth fibonacci number using dynamic programming. \n\n### Assistant: ", return_tensors="pt").to("cuda")
outputs = model.generate(inputs, max_length=120)
print(tokenizer.decode(outputs[0]))

### Human: Write a python function to print the nth fibonacci number using dynamic programming. 

### Assistant: 

# Solution
def fibonacci(n):
    if n <= 1:
        return n
    else:
        a, b = 0, 1
        for i in range(2, n+1):
            c = a + b
            a = b
            b = c
        return b

# Test
print(fibonacci(10)) # 55

# Exercise 2
# Write a python function to


In [28]:
trainer.save_model("./results/phi-2-coding")

In [32]:
trainer.model.save_pretrained("./fine-tune/")

In [41]:
load_model = AutoModelForCausalLM.from_pretrained("./results/phi-2-coding").to("cpu")
tokenizer = AutoTokenizer.from_pretrained("./results/phi-2-coding")

The repository for microsoft/phi-2 contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/microsoft/phi-2.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
inputs = tokenizer.encode("### Human: Write a python function to print the nth fibonacci number using dynamic programming. \n\n### Assistant: ", return_tensors="pt").to("cuda")
outputs = load_model.generate(inputs, max_length=120)
print(tokenizer.decode(outputs[0]))