In [None]:
!pip install datasets
!pip install transformers
!pip install accelerate
!pip install bitsandbytes
!pip install peft
!pip install trl
!pip install sentencepiece

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets-

In [1]:
from google.colab import userdata

secret_hf = userdata.get('HUGGINGFACE_TOKEN')
!huggingface-cli login --token $secret_hf

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load Dataset wiki_QA

In [2]:
from datasets import load_dataset, concatenate_datasets

def concated_dataset (dataset_id, split_list, name=""):
    dataset = load_dataset(dataset_id, name=name, split=split_list)
    return concatenate_datasets(dataset)

def generate_text(sample):
    text = '<s>[INST]' + sample['question'] +'? [/INST]'+ sample['answer'] + '</s>'
    return {"text": text}

train_dataset = concated_dataset("wiki_qa", split_list=["train", "validation"])
train_dataset = train_dataset.map(lambda sample:generate_text(sample))

test_dataset = concated_dataset("wiki_qa", split_list=["test"])
test_dataset = train_dataset.map(lambda sample:generate_text(sample))

train_dataset.to_pandas().sample(5)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Unnamed: 0,question_id,question,document_title,answer,label,text
5775,Q818,what countries are very near to Haiti?,Haiti,Political violence has occurred regularly thro...,0,<s>[INST]what countries are very near to Haiti...
5752,Q816,what does the royal family do,British Royal Family,In 2013 Letters Patent were issued to extend a...,0,<s>[INST]what does the royal family do? [/INST...
1675,Q227,how does a steam engine work,Steam engine,"In the cycle, water is heated into steam in a ...",0,<s>[INST]how does a steam engine work? [/INST]...
5814,Q824,How did Frida Kahlo die?,Frida Kahlo,Many of her health problems were the result of...,0,<s>[INST]How did Frida Kahlo die?? [/INST]Many...
1712,Q231,how is christianity viewed in australia,Christianity in Australia,Christianity is the largest religion listed by...,0,<s>[INST]how is christianity viewed in austral...


## Quantization

In [3]:
from transformers import BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(load_in_4bit= True,
                                bnb_4bit_quant_type= "nf4",
                                bnb_4bit_compute_dtype= torch.bfloat16,
                                bnb_4bit_use_double_quant= False)

## Load Base Model

In [4]:
from transformers import AutoModelForCausalLM

base_model = "mistralai/Mistral-7B-Instruct-v0.2"

model = AutoModelForCausalLM.from_pretrained(base_model,
                                            #  load_in_4bit=True,
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.bfloat16,
                                             device_map="auto",
                                             trust_remote_code=True)

model.config.use_cache = False
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Load tokenizer

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(base_model,
                                          trust_remote_code=True)

tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token

('<s>', '</s>')

### Peft, Lora Configuration

In [6]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(lora_alpha=16,
                         lora_dropout=0.1,
                         r=64,
                         bias="none",
                         task_type="CAUSAL_LM",
                         target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"])

model = get_peft_model(model, peft_config)

## Train Arguments

In [7]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(output_dir="./results",
                                       num_train_epochs=1,
                                       per_device_train_batch_size=4,
                                       gradient_accumulation_steps=1,
                                       optim="paged_adamw_32bit",
                                       save_steps=300,
                                       logging_steps=300,
                                       learning_rate=2e-4,
                                       weight_decay=0.001,
                                       fp16=False,
                                       bf16=False,
                                       max_grad_norm=0.3,
                                       max_steps=-1,
                                       warmup_ratio=0.03,
                                       group_by_length=True,
                                       lr_scheduler_type="constant")

## Trainer

In [8]:
from trl import SFTTrainer

trainer = SFTTrainer(model=model,
                     train_dataset=train_dataset,
                     eval_dataset=test_dataset,
                     peft_config=peft_config,
                     max_seq_length= None,
                     dataset_text_field="text",
                     tokenizer=tokenizer,
                     args=training_arguments,
                     packing= False)



Map:   0%|          | 0/23093 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [9]:
trainer.train()



Step,Training Loss


KeyboardInterrupt: 

In [None]:
model.eval()

In [None]:
new_model = "Mistral-7b_0.2-wiki_QA-Colab_Standard"

trainer.model.save_pretrained(new_model)
model.config.use_cache = True

In [None]:
trainer.model.push_to_hub(new_model)

adapter_model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MohamedAhmedAE/Mistral-7b_0.2-wiki_QA-Colab_Standard/commit/292ec92ca0019732cfe7ea472787cc266588a751', commit_message='Upload model', commit_description='', oid='292ec92ca0019732cfe7ea472787cc266588a751', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
from transformers import pipeline, logging

logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [None]:
def build_prompt(question):
  prompt=f"<s>[INST]{question}?[/INST]"
  return prompt

In [None]:
question = "what is Light Gun"
prompt = build_prompt(question)
result = pipe(prompt)

print(result[0]['generated_text'])

<s>[INST]what is Light Gun?[/INST]A light gun is a peripheral device for video games, designed to be used with light gun games. The device is held like a pistol, and has a trigger and a sight. The player aims the gun at a screen and pulls the trigger to shoot virtual targets. The gun is connected to the game console via a wired connection.

The first light gun game was Duck Hunt, released for the NES in 1984. The game was bundled with the NES Zapper, a light gun peripheral. The gun was designed to be used with the game, and was not compatible with other games.

The first light gun game for a home console was Sega's Hang-On, released for the Sega Master System in 1986. The game was bundled with the Sega Light Phaser, a light gun peripheral. The gun was


In [None]:
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from peft import PeftModel

base_model = "mistralai/Mistral-7B-Instruct-v0.2"
new_model = "MohamedAhmedAE/Mistral-7b_0.2-wiki_QA-Colab_Standard"

tokenizer = AutoTokenizer.from_pretrained(base_model, add_special_tokens=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.add_special_tokens = False

base_model = AutoModelForCausalLM.from_pretrained(base_model,
                                                  low_cpu_mem_usage=True,
                                                  return_dict=True,
                                                  torch_dtype=torch.float16,
                                                  # device_map="auto"
                                                  )

merged_model= PeftModel.from_pretrained(base_model, new_model)
merged_model = merged_model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_model", safe_serialiaztion=True)
tokenizer.save_pretrained("merged_model")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
merged_model

In [None]:
merged_model.push_to_hub(new_model)#, use_temp_dir=False)
tokenizer.push_to_hub(new_model)#, use_temp_dir=False)

## Build performance metric

In [None]:
df_test = train_dataset.to_pandas().sample(5)

questionCounter=0
correct=0
promptEnding = "[/INST]"

# this must be >= 2
fail_limit=10

# chain of thought activator, model might run out of output tokens
USE_COT=True

#this comes before the question
testGuide='Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d \n'

for index, row in df_test.iterrows():
    print("#############################")
    questionCounter = questionCounter + 1

    #chain of thought activator
    if USE_COT:
        chainOfThoughtActivator='\nfirst think step by step\n'
    else:
        chainOfThoughtActivator='\n'

    #build the prompt
    question=testGuide + row['Question'] + '\na)' + row['a'] + '\nb)' + row['b'] + '\nc)' + row['c'] + '\nd)' + row['d'] + chainOfThoughtActivator
    print(question)

    #true answer
    truth=row['Answer']

    #use a loop, if llm stopped before reaching the answer. ask again
    index=-1
    failCounter=0
    while(index==-1):

        #build the prompt
        prompt = build_prompt(question)

        #generate answer
        result = pipe(prompt)
        llmAnswer = result[0]['generated_text']

        #remove our prompt from it
        index = llmAnswer.find(promptEnding)
        llmAnswer = llmAnswer[len(promptEnding)+index:]

        print("LLM Answer:")
        print(llmAnswer)

        #remove spaces
        llmAnswer=llmAnswer.replace(' ','')

        #find the option in response
        index = llmAnswer.find('Answer:')

        #edge case - llm stoped at the worst time
        if(index+len('Answer:')==len(llmAnswer)):
            index=-1

        #update question for the next try. remove chain of thought
        question=testGuide + row['Question'] + '\na)' + row['a'] + '\nb)' + row['b'] + '\nc)' + row['c'] + '\nd)' + row['d']

        #Don't get stock on a question
        failCounter=failCounter+1
        if failCounter==fail_limit:
            break

    if failCounter==fail_limit:
        continue

    #find and match the option
    next_char = llmAnswer[index+len('Answer:'):][0]
    if next_char in truth:
        correct=correct+1
        print('correct')
    else:
        print('wrong')

    #update accuracy
    accuracy=correct/questionCounter
    print(f"Progress: {questionCounter/len(df_test)}")
    print(f"Accuracy: {accuracy}")


