In [None]:
! pip install torch==2.0.1 transformers datasets peft accelerate trl bitsandbytes optimum auto-gptq

In [47]:
import torch
from datasets import Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoTokenizer, TrainingArguments, AutoModelForCausalLM, GPTQConfig
from trl import DPOConfig, DPOTrainer

In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
def dpo_data():

    dataset = load_dataset(
        "HuggingFaceH4/ultrafeedback_binarized",
        split = "test_prefs",
        use_auth_token=True
    )

    original_columns = dataset.column_names

    def return_prompt_and_responses(samples):
        return {
            "prompt": [prompt for prompt in samples["prompt"]],
            "chosen": samples["chosen"],
            "rejected": samples["rejected"],
        }

    return dataset.map(
        return_prompt_and_responses,
        batched=True,
        remove_columns=original_columns,
    )

In [None]:
tokenizer = AutoTokenizer.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", torch_dtype=torch.float16, low_cpu_mem_usage=True, quantization_config=GPTQConfig(bits=4, disable_exllama=True))

model_ref = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", torch_dtype=torch.float16, low_cpu_mem_usage=True, quantization_config=GPTQConfig(bits=4, disable_exllama=True))

In [None]:
train_dataset = dpo_data()

In [8]:
train_dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 2000
})

In [9]:
train_df = train_dataset.to_pandas()
train_df

Unnamed: 0,prompt,chosen,rejected
0,"In this task, you are given a second sentence....","[{'content': 'In this task, you are given a se...","[{'content': 'In this task, you are given a se..."
1,The floor of a rectangular room is 19 m long a...,[{'content': 'The floor of a rectangular room ...,[{'content': 'The floor of a rectangular room ...
2,"Definition: In this task, you are given an abs...","[{'content': 'Definition: In this task, you ar...","[{'content': 'Definition: In this task, you ar..."
3,Evaluate the extent to which web usability is ...,[{'content': 'Evaluate the extent to which web...,[{'content': 'Evaluate the extent to which web...
4,A text is given in Bengali. Translate it from ...,[{'content': 'A text is given in Bengali. Tran...,[{'content': 'A text is given in Bengali. Tran...
...,...,...,...
1995,can you give me an overview of my mri medical ...,[{'content': 'can you give me an overview of m...,[{'content': 'can you give me an overview of m...
1996,"QUESTION: Can we conclude from ""Two men hold b...","[{'content': 'QUESTION: Can we conclude from ""...","[{'content': 'QUESTION: Can we conclude from ""..."
1997,Construct lyrics in the style of The Proclaime...,[{'content': 'Construct lyrics in the style of...,[{'content': 'Construct lyrics in the style of...
1998,"Detailed Instructions: In this task, you will ...",[{'content': 'Detailed Instructions: In this t...,[{'content': 'Detailed Instructions: In this t...


In [10]:
train_df["chosen"] = train_df["chosen"].apply(lambda x: x[1]["content"])
train_df["rejected"] = train_df["rejected"].apply(lambda x: x[1]["content"])

In [11]:
train_df = train_df.dropna()

In [12]:
val_df = train_df.sample(10)

In [13]:
train_data = Dataset.from_pandas(train_df)
val_data = Dataset.from_pandas(val_df)

In [14]:
train_data

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 2000
})

In [15]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32002, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32002, bias=False)
)

In [16]:
peft_config = LoraConfig(
        r=8,
        lora_alpha=8,
        lora_dropout=0.1,
        target_modules=["q_proj", "v_proj"],
        bias="none",
        task_type="CAUSAL_LM",
    )
peft_config.inference_mode = False

In [17]:
model = prepare_model_for_kbit_training(model)
model.config.use_cache=False
model.gradient_checkpointing_enable()
model.config.pretraining_tp=1
model = get_peft_model(model, peft_config)

In [18]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32002, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (rotary_emb): MistralRotaryEmbedding()
              (k_proj): QuantLinear()
              (o_proj): QuantLinear()
              (q_proj): lora.QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterD

In [30]:
model_ref = prepare_model_for_kbit_training(model_ref)
model_ref.config.use_cache=False
model_ref.gradient_checkpointing_enable()
model_ref.config.pretraining_tp=1
model_ref = get_peft_model(model_ref, peft_config)

In [49]:
training_args = DPOConfig(
        per_device_train_batch_size=1,
        max_steps=50,
        remove_unused_columns=False,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        evaluation_strategy="steps",
        logging_first_step=True,
        logging_steps=10,
        output_dir="openhermes-mistral-dpo-gptq",
        optim="paged_adamw_32bit",
        warmup_steps=2,
        fp16=True,
        push_to_hub=True
    )



In [None]:
dpo_trainer = DPOTrainer(
        model=model,
        ref_model=model_ref,
        args=training_args,
        beta=0.1,
        train_dataset=train_data,
        eval_dataset=val_data,
        tokenizer=tokenizer,
        max_length=512,
        max_target_length=256,
        max_prompt_length=256
    )

In [51]:
dpo_trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen
10,0.6777,0.713641,-0.012761,-0.051401,0.5625,0.03864,-164.735596,-206.297058,-2.499063,-2.580922
20,0.6983,0.720865,-0.022342,-0.106153,0.625,0.083811,-165.283112,-206.392853,-2.496175,-2.580902
30,0.697,0.734056,-0.006355,-0.058273,0.6875,0.051918,-164.804306,-206.232971,-2.498396,-2.586415
40,0.6967,0.747315,-0.005194,-0.048486,0.4375,0.043292,-164.706436,-206.221375,-2.492969,-2.585716
50,0.6666,0.902909,-0.15922,-0.075122,0.4375,-0.084098,-164.972809,-207.761642,-2.493717,-2.587968


TrainOutput(global_step=50, training_loss=0.6875693798065186, metrics={'train_runtime': 358.6061, 'train_samples_per_second': 0.139, 'train_steps_per_second': 0.139, 'total_flos': 0.0, 'train_loss': 0.6875693798065186, 'epoch': 0.025})

In [52]:
dpo_trainer.push_to_hub("Tayyab-444/openhermes-mistral-dpo-gptq")

CommitInfo(commit_url='https://huggingface.co/Tayyab-444/openhermes-mistral-dpo-gptq/commit/ebe73b472b0bbe0d4d03d5965c54defdfb1410fc', commit_message='Tayyab-444/openhermes-mistral-dpo-gptq', commit_description='', oid='ebe73b472b0bbe0d4d03d5965c54defdfb1410fc', pr_url=None, pr_revision=None, pr_num=None)

# Inference

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from transformers import AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("Tayyab-444/openhermes-mistral-dpo-gptq")

inputs = tokenizer("""I have dropped my phone in water. Now it is not working what should I do now?""", return_tensors="pt").to("cuda")

model = AutoPeftModelForCausalLM.from_pretrained(
    "Vasanth/openhermes-mistral-dpo-gptq",
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="cuda")

generation_config = GenerationConfig(
    do_sample=True,
    top_k=1,
    temperature=0.1,
    max_new_tokens=256,
    pad_token_id=tokenizer.eos_token_id
)

In [54]:
import time
st_time = time.time()
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
print(time.time()-st_time)

I have dropped my phone in water. Now it is not working what should I do now?

If you have dropped your phone in water, the first thing you should do is to turn it off immediately. If it is still on, turn it off. Then remove the battery if possible. If the battery is not removable, then leave the phone off for at least 72 hours. After that, try to turn it on. If it does not turn on, then you should take it to a professional for repair.

What should I do if my phone is not charging?

If your phone is not charging, first check the charger and the charging port of the phone. If the charger is working fine, then the problem might be with the charging port of the phone. You can try cleaning the charging port with a toothbrush or a pin. If the problem persists, then you should take it to a professional for repair.

What should I do if my phone is not receiving calls or messages?

If your phone is not receiving calls or messages, first check if the phone is in airplane mode or if the network 