In [3]:
!pip install transformers
!pip install datasets
!pip install accelerate
!pip install bitsandbytes
!pip install trl
!pip install peft
!pip install auto_gptq
!pip install optimum
!pip install streamlit-chat

Collecting streamlit-chat
  Downloading streamlit_chat-0.1.1-py3-none-any.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: streamlit-chat
Successfully installed streamlit-chat-0.1.1


In [1]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer



### Config

In [3]:
class DatasetConfig:
    VAL_SIZE = 0.1
    DATASET_ID = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"
    INSTRUCTION_FIELD = "instruction"
    TARGET_FIELD = "response"
    DATASET_TEXT_FIELD = "text"

In [4]:
class TokenizerConfig:
    MODEL_ID = "TheBloke/zephyr-7B-alpha-GPTQ"

In [5]:
class ModelConfig:
    MODEL_ID = "TheBloke/zephyr-7B-alpha-GPTQ"
    CONTEXT_FIELD = ""
    DEVIE_MAP = "auto"
    USE_CASH = False

In [6]:
class TrainingArgumentsConfig:
    OUTPUT_DIR = "zephyr-support-chatbot"
    BATCH_SIZE = 8
    EVALUATION_STRATEGY="steps"
    EVAL_STEPS=500
    GRAD_ACCUMULATION_STEPS = 1
    OPTIMIZER = "paged_adamw_32bit"
    LR = 2e-4
    LR_SCHEDULER = "cosine"
    LOGGING_STEPS = 50
    SAVE_STRATEGY = "epoch"
    NUM_TRAIN_EPOCHs = 1
    MAX_STEPS = 250
    FP16 = True
    PUSH_TO_HUB = True

In [7]:
class BitsBytesConfig:
    BITS = 4
    DISABLE_EXLLAMA = True

In [8]:
class PeftConfig:
    LORA_R = 16
    LORA_ALPHA = 16
    LORA_DROPOUT = 0.05
    BIAS = "none"
    TARGET_MODULES = ["q_proj", "v_proj"]
    TASK_TYPE = "CAUSAL_LM"

In [9]:
class TrainerConfig:
    MAX_SEQ_LENGTH = 512
    PACKING = False

In [10]:
class ModelDataset:
    def process_data_sample(self, example):
        processed_example = "<|system|>\n You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<|user|>\n" + example[DatasetConfig.INSTRUCTION_FIELD] + "\n<|assistant|>\n" + example[DatasetConfig.TARGET_FIELD]
        return processed_example

    def create_dataset(self):
        data = load_dataset(DatasetConfig.DATASET_ID)
        dataset = data["train"].train_test_split(test_size=DatasetConfig.VAL_SIZE,
                                                    shuffle=True, seed=2)

        print("\n====================================================================\n")
        print("\t\t\tDOWNLOADED DATASET")
        print("\n====================================================================\n")
        print(data)

        train_df = dataset["train"].to_pandas()
        valid_df = dataset["test"].to_pandas()

        train_df[DatasetConfig.DATASET_TEXT_FIELD] = train_df[[DatasetConfig.INSTRUCTION_FIELD, DatasetConfig.TARGET_FIELD]].apply(lambda x: self.process_data_sample(x), axis=1)
        valid_df[DatasetConfig.DATASET_TEXT_FIELD] = valid_df[[DatasetConfig.INSTRUCTION_FIELD, DatasetConfig.TARGET_FIELD]].apply(lambda x: self.process_data_sample(x), axis=1)

        print("\n====================================================================\n")
        print("\t\t\tPROCESSED DATASET")
        print(train_df.iloc[0])
        print("\n====================================================================\n")

        train_data = Dataset.from_pandas(train_df[[DatasetConfig.DATASET_TEXT_FIELD]])
        valid_data = Dataset.from_pandas(valid_df[[DatasetConfig.DATASET_TEXT_FIELD]])
        return train_data, valid_data

In [11]:
dataset = ModelDataset()
train_data, valid_data = dataset.create_dataset()



			DOWNLOADED DATASET


DatasetDict({
    train: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 26872
    })
})


			PROCESSED DATASET
flags                                                       BELM
instruction    I do not know how I can locate my bills from {...
category                                                 INVOICE
intent                                             check_invoice
response       I understand your frustration and uncertainty ...
text           <|system|>\n You are a support chatbot who hel...
Name: 0, dtype: object




### Tokenizer

In [12]:
tokenizer = AutoTokenizer.from_pretrained(TokenizerConfig.MODEL_ID)
# tokenizer.pad_token = tokenizer.eos_token


### Trainer

In [13]:
class Zephyr:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def prepare_model(self):
        bnb_config = GPTQConfig(bits=BitsBytesConfig.BITS,
                                disable_exllama=BitsBytesConfig.DISABLE_EXLLAMA,
                                tokenizer=self.tokenizer)

        model = AutoModelForCausalLM.from_pretrained(ModelConfig.MODEL_ID,
                                                     quantization_config=bnb_config,
                                                     device_map=ModelConfig.DEVIE_MAP)

        print("\n====================================================================\n")
        print("\t\t\tDOWNLOADED MODEL")
        print(model)
        print("\n====================================================================\n")

        model.config.use_cash = ModelConfig.USE_CASH
        model.config.pretraining_tp = 1
        model.gradient_checkpointing_enable()
        model = prepare_model_for_kbit_training(model)

        print("\n====================================================================\n")
        print("\t\t\tMODEL CONFIG UPDATED")
        print("\n====================================================================\n")

        peft_config = LoraConfig(r=PeftConfig.LORA_R,
                                 lora_alpha=PeftConfig.LORA_ALPHA,
                                 lora_dropout=PeftConfig.LORA_DROPOUT,
                                 bias=PeftConfig.BIAS,
                                 task_type=PeftConfig.TASK_TYPE,
                                 target_modules=PeftConfig.TARGET_MODULES)

        model = get_peft_model(model, peft_config)

        print("\n====================================================================\n")
        print("\t\t\tPREPARED MODEL FOR FINETUNING")
        print(model)
        print("\n====================================================================\n")

        return model, peft_config

    def set_training_arguments(self):
        training_arguments = TrainingArguments(output_dir=TrainingArgumentsConfig.OUTPUT_DIR,
                                               per_device_train_batch_size=TrainingArgumentsConfig.BATCH_SIZE,
                                               per_device_eval_batch_size=TrainingArgumentsConfig.BATCH_SIZE,
                                               gradient_accumulation_steps=TrainingArgumentsConfig.GRAD_ACCUMULATION_STEPS,
                                               optim=TrainingArgumentsConfig.OPTIMIZER,
                                               learning_rate=TrainingArgumentsConfig.LR,
                                               lr_scheduler_type=TrainingArgumentsConfig.LR_SCHEDULER,
                                               save_strategy=TrainingArgumentsConfig.SAVE_STRATEGY,
                                               evaluation_strategy=TrainingArgumentsConfig.EVALUATION_STRATEGY,
                                               eval_steps=TrainingArgumentsConfig.EVAL_STEPS,
                                               logging_steps=TrainingArgumentsConfig.LOGGING_STEPS,
                                               num_train_epochs=TrainingArgumentsConfig.NUM_TRAIN_EPOCHs,
                                               max_steps=TrainingArgumentsConfig.MAX_STEPS,
                                               fp16=TrainingArgumentsConfig.FP16,
                                               push_to_hub=TrainingArgumentsConfig.PUSH_TO_HUB)
        return training_arguments

    def compile_trainer(self, train_data, valid_data):
        model, peft_config = self.prepare_model()
        training_args = self.set_training_arguments()

        print("\n====================================================================\n")
        print("\t\t\tPREPARED FOR FINETUNING")
        print("\n====================================================================\n")

        self.trainer = SFTTrainer(model=model,
                                  train_dataset=train_data,
                                  eval_dataset=valid_data,
                                  dataset_text_field=DatasetConfig.DATASET_TEXT_FIELD,
                                  args=training_args,
                                  tokenizer=self.tokenizer,
                                  packing=TrainerConfig.PACKING,
                                  max_seq_length=TrainerConfig.MAX_SEQ_LENGTH)


    def train(self):
        self.trainer.train()

        print("\n====================================================================\n")
        print("\t\t\tFINETUNING COMPLETED")
        print("\n====================================================================\n")

        self.trainer.push_to_hub()
        return self.trainer

In [14]:
zephyer = Zephyr(tokenizer)
zephyer.compile_trainer(train_data, valid_data):
zephyer.train(train_data, valid_data)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. use_exllama, exllama_config, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.




			DOWNLOADED MODEL
MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=2)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLUActivation()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)
)




			MODEL CONFIG UPDATED




			PREPARED MODEL FOR FINETUNING
PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(


Map:   0%|          | 0/24184 [00:00<?, ? examples/s]

Map:   0%|          | 0/2688 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


Step,Training Loss,Validation Loss




			FINETUNING COMPLETED


