<a href="https://colab.research.google.com/github/LC1332/Chat-Haruhi-Suzumiya/blob/main/notebook/Finetuning_Phi15_Manual.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing Dependencies

In [1]:
! pip install accelerate transformers einops datasets peft bitsandbytes



In [2]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Importing Dependencies

In [3]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import os

# Finetuning

In [4]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-1_5",
    device_map={"":0},
    trust_remote_code=True,
    quantization_config=bnb_config
)

In [None]:
model

PhiForCausalLM(
  (transformer): PhiModel(
    (embd): Embedding(
      (wte): Embedding(51200, 2048)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (h): ModuleList(
      (0-23): 24 x ParallelBlock(
        (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.0, inplace=False)
        (mixer): MHA(
          (rotary_emb): RotaryEmbedding()
          (Wqkv): Linear4bit(in_features=2048, out_features=6144, bias=True)
          (out_proj): Linear4bit(in_features=2048, out_features=2048, bias=True)
          (inner_attn): SelfAttention(
            (drop): Dropout(p=0.0, inplace=False)
          )
          (inner_cross_attn): CrossAttention(
            (drop): Dropout(p=0.0, inplace=False)
          )
        )
        (mlp): MLP(
          (fc1): Linear4bit(in_features=2048, out_features=8192, bias=True)
          (fc2): Linear4bit(in_features=8192, out_features=2048, bias=True)
          (act): NewGELUActivation()
        )
      )
 

In [6]:
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["Wqkv", "out_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 4,718,592 || all params: 1,422,989,312 || trainable%: 0.3315971497613047


In [7]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PhiForCausalLM(
      (transformer): PhiModel(
        (embd): Embedding(
          (wte): Embedding(51200, 2048)
          (drop): Dropout(p=0.0, inplace=False)
        )
        (h): ModuleList(
          (0-23): 24 x ParallelBlock(
            (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (mixer): MHA(
              (rotary_emb): RotaryEmbedding()
              (Wqkv): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=6144, bias=False)
                )
                (lora_embedding_A): ParameterDict()
             

In [8]:
def tokenize(sample):
    context = "context: " + " ".join(sample["context"])
    target = "target: " + " ".join(sample["target"])

    encoded_context = tokenizer(context, padding="max_length", truncation=True, max_length=512)
    encoded_target = tokenizer(target, padding="max_length", truncation=True, max_length=512)

    input_ids = encoded_context["input_ids"] + encoded_target["input_ids"]
    attention_mask = [0] * len(encoded_context["input_ids"]) + [1] * len(encoded_target["input_ids"])

    max_length = 512
    pad_token_id = tokenizer.pad_token_id
    input_ids = input_ids[:max_length] + [pad_token_id] * (max_length - len(input_ids))
    attention_mask = attention_mask[:max_length] + [0] * (max_length - len(attention_mask))

    model_inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask
    }

    return model_inputs

In [9]:
data = load_dataset("silk-road/ChatHaruhi-English-62K-RolePlaying", split="train")
data_df = data.to_pandas()
data_df["text"] = data_df[["context", "target"]].apply(lambda x: "context: " + x["context"] + " target: " + x["target"], axis=1)
data = Dataset.from_pandas(data_df)
tokenized_data = data.map(tokenize, batched=True, desc="Tokenizing data", remove_columns=data.column_names)
tokenized_data

Downloading readme:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/243M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Tokenizing data:   0%|          | 0/62362 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 32256
})

In [26]:
tokenized_data[0]

{'input_ids': 22866, 'attention_mask': 0}

In [None]:
training_arguments = TrainingArguments(
        output_dir="phi-1_5-finetuned-haruhi",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=100,
        max_steps=1000,
        num_train_epochs=1,
        push_to_hub=True
    )

In [None]:
# from transformers import DefaultDataCollator
# import torch

# class ContextMaskedCollator(DefaultDataCollator):

#   def __call__(self, examples):
#     inputs = [example["context"] + example["target"]
#               for example in examples]

#     labels = [example["target"] for example in examples]

#     attention_mask = []
#     for input in inputs:
#       mask = [1] * len(example["target"]) + [0] * len(example["context"])
#       attention_mask.append(mask)

#     return {
#       "input_ids": inputs,
#       "labels": labels,
#       "attention_mask": attention_mask
#     }
# data_collator = ContextMaskedCollator()

In [None]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_data,
    args=training_arguments,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()
trainer.push_to_hub()

You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
100,9.3562
200,7.5876
300,7.68
400,7.3703
500,7.001
600,7.5846
700,7.1245
800,7.6085
900,7.2904
1000,7.2591


'https://huggingface.co/hhhwmws/phi-1_5-finetuned-haruhi/tree/main/'

# Saving

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype=torch.float32)
peft_model = PeftModel.from_pretrained(model, "hhhwmws/phi-1_5-finetuned-haruhi", from_transformers=True)
model = peft_model.merge_and_unload()
model

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

configuration_phi.py:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-1_5:
- configuration_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi.py:   0%|          | 0.00/33.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-1_5:
- modeling_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/485 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

PhiForCausalLM(
  (transformer): PhiModel(
    (embd): Embedding(
      (wte): Embedding(51200, 2048)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (h): ModuleList(
      (0-23): 24 x ParallelBlock(
        (ln): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.0, inplace=False)
        (mixer): MHA(
          (rotary_emb): RotaryEmbedding()
          (Wqkv): Linear(in_features=2048, out_features=6144, bias=True)
          (out_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (inner_attn): SelfAttention(
            (drop): Dropout(p=0.0, inplace=False)
          )
          (inner_cross_attn): CrossAttention(
            (drop): Dropout(p=0.0, inplace=False)
          )
        )
        (mlp): MLP(
          (fc1): Linear(in_features=2048, out_features=8192, bias=True)
          (fc2): Linear(in_features=8192, out_features=2048, bias=True)
          (act): NewGELUActivation()
        )
      )
    )
  )
  (lm_h

In [None]:
model.push_to_hub("hhhwmws/phi-1_5-finetuned-haruhi")

# Inference

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("hhhwmws/phi-1_5-finetuned-haruhi", trust_remote_code=True, torch_dtype=torch.float32)
# tokenizer = AutoTokenizer.from_pretrained("hhhwmws/phi-1_5", trust_remote_code=True)
inputs = tokenizer('''I want you to act like Sheldon Cooper from Big Bang Theory. If others‘ questions are related with the novel, please try to reuse the original lines from the novel. I want you to respond and answer like Sheldon using the tone, manner and vocabulary Sheldon would use. You must know all of the knowledge of Sheldon. Note that Sheldon has certain social difficulties, sometimes displaying awkward and inappropriate behavior. Sheldon likes to strictly plan his life according to his own habits and schedule, not allowing any disruptions He often appears conceited and self-righteous in front of friends, believing himself to be intellectually superior. Classic scenes for the role are as follows: ### Raj:「Go away. ''(Sheldon exits)''」 Sheldon:「Curiouser and curiouser.」 ### Leonard:「''(Pointing)'' Two seats right there.」 Sheldon:「''(To two oriental-looking people occupying the other seats)'' Chong sho sha pwe. ''(Caption translates to “Long Live Concrete”.)'' Xie xie. ''(Thank you)''」 Leonard:「Sheldon, I think I’ve made a mistake.」 Sheldon:「I can see that. Unless you're planning on running a marathon, choosing both stuffing and mashed potatoes is a starch filled redundancy.」 Leonard:「No, it's about Penny.」 Sheldon:「A mistake involving Penny? Okay, you'll have to narrow it down.」 Leonard:「I don't think I can go out with her tonight.」 Sheldon:「Then don't.」 Leonard:「Other people would say “why not?”」 Sheldon:「Other people might be interested.」 Leonard:「I'm going to talk anyway.」 Sheldon:「I assumed you would.」 Leonard:「Now that I'm actually about to go out with Penny, I'm not excited, I'm nauseous.」 Sheldon:「Ah, then your meal choice is appropriate. Starch absorbs fluid which reduces the amount of vomit available for violent expulsion.」 Leonard:「Right.」 Sheldon:「You also made a common grammatical mistake, you said nauseous when you meant nauseated. But go on.」 Leonard:「Sheldon, this date is probably my one chance with Penny, what happens if I blow it.」 Sheldon:「Well, if we accept your premise, and also accept the highly improbable assumption that Penny is the only woman in the world for you then we can logically conclude that the result of blowing it would be that you end up a lonely, bitter old man with no progeny. The image of any number of evil lighthouse keepers from Scooby Doo cartoons comes to mind.」 Leonard:「You're not helping.」 Sheldon:「Alright, what response on my part would bring this conversation to a speedy conclusion?」 Leonard:「Tell me whether or not to go through with the date.」 Sheldon:「Schrödinger's Cat.」 Leonard:「Wow, that's brilliant.」 Sheldon:「You sound surprised. Mmm, hou zi shui zai li du. ''(Your monkey sleeps inside me.)''」 ### Penny:「Leo, you are a very sweet, really funny guy. You're gonna do okay.」 Toby:「One day at a time, Penny, one day at a time.」 Leonard:「How long is he going to stay here.」 Sheldon:「He's a homeless drug addict, Leonard, where is he going to go? Boy, you have a lot to learn about lying.」 ### Raj:「Okay, I know what I'm going to do.」 Leonard:「What?」 Raj:「Find new friends.」 Howard:「So who wants to rent Fiddler?」 Sheldon:「No need, we have the special edition.」 ### Raj:「Of course, but it's all Indian food. You can't find a bagel in Mumbai to save your life. Schmear me.」''', return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length=1024)
text = tokenizer.batch_decode(outputs)[0]
print(text)


KeyboardInterrupt: ignored

In [None]:
inputs = tokenizer('''I want you to act like Sheldon Cooper from Big Bang Theory. If others‘ questions are related with the novel, please try to reuse the original lines from the novel. I want you to respond and answer like Sheldon using the tone, manner and vocabulary Sheldon would use. You must know all of the knowledge of Sheldon. Note that Sheldon has certain social difficulties, sometimes displaying awkward and inappropriate behavior. Sheldon likes to strictly plan his life according to his own habits and schedule, not allowing any disruptions He often appears conceited and self-righteous in front of friends, believing himself to be intellectually superior. Classic scenes for the role are as follows: ### Raj:「Go away. ''(Sheldon exits)''」 Sheldon:「Curiouser and curiouser.」 ### Leonard:「''(Pointing)'' Two seats right there.」 Sheldon:「''(To two oriental-looking people occupying the other seats)'' Chong sho sha pwe. ''(Caption translates to “Long Live Concrete”.)'' Xie xie. ''(Thank you)''」 Leonard:「Sheldon, I think I’ve made a mistake.」 Sheldon:「I can see that. Unless you're planning on running a marathon, choosing both stuffing and mashed potatoes is a starch filled redundancy.」 Leonard:「No, it's about Penny.」 Sheldon:「A mistake involving Penny? Okay, you'll have to narrow it down.」 Leonard:「I don't think I can go out with her tonight.」 Sheldon:「Then don't.」 Leonard:「Other people would say “why not?”」 Sheldon:「Other people might be interested.」 Leonard:「I'm going to talk anyway.」 Sheldon:「I assumed you would.」 Leonard:「Now that I'm actually about to go out with Penny, I'm not excited, I'm nauseous.」 Sheldon:「Ah, then your meal choice is appropriate. Starch absorbs fluid which reduces the amount of vomit available for violent expulsion.」 Leonard:「Right.」 Sheldon:「You also made a common grammatical mistake, you said nauseous when you meant nauseated. But go on.」 Leonard:「Sheldon, this date is probably my one chance with Penny, what happens if I blow it.」 Sheldon:「Well, if we accept your premise, and also accept the highly improbable assumption that Penny is the only woman in the world for you then we can logically conclude that the result of blowing it would be that you end up a lonely, bitter old man with no progeny. The image of any number of evil lighthouse keepers from Scooby Doo cartoons comes to mind.」 Leonard:「You're not helping.」 Sheldon:「Alright, what response on my part would bring this conversation to a speedy conclusion?」 Leonard:「Tell me whether or not to go through with the date.」 Sheldon:「Schrödinger's Cat.」 Leonard:「Wow, that's brilliant.」 Sheldon:「You sound surprised. Mmm, hou zi shui zai li du. ''(Your monkey sleeps inside me.)''」 ### Penny:「Leo, you are a very sweet, really funny guy. You're gonna do okay.」 Toby:「One day at a time, Penny, one day at a time.」 Leonard:「How long is he going to stay here.」 Sheldon:「He's a homeless drug addict, Leonard, where is he going to go? Boy, you have a lot to learn about lying.」 ### Raj:「Okay, I know what I'm going to do.」 Leonard:「What?」 Raj:「Find new friends.」 Howard:「So who wants to rent Fiddler?」 Sheldon:「No need, we have the special edition.」 ### Raj:「Of course, but it's all Indian food. You can't find a bagel in Mumbai to save your life. Schmear me.」''', return_tensors="pt", return_attention_mask=False)

outputs = model.generate(**inputs, max_length=2048)
text = tokenizer.batch_decode(outputs)[0]
print(text)