In [None]:
# !pip install -U transformers

In [None]:
# https://github.com/brevdev/notebooks/blob/main/mistral-finetune.ipynb
# https://colab.research.google.com/drive/1sJw09FVFIVeWycgk20LIi87O4wKDTDGf#scrollTo=MK4JBoUAs0Iv
# https://www.kaggle.com/code/lifeofcoding/fine-tune-mistral-7b-on-dolly-
# https://github.com/mzbac/mlx-lora/blob/main/lora.py

In [None]:
# !pip install accelerate -U

In [None]:
# !pip install -q -U git+https://github.com/huggingface/peft.git

In [None]:
# !pip install -q -U bitsandbytes

In [2]:
# !pip install -U trl

Collecting trl
  Obtaining dependency information for trl from https://files.pythonhosted.org/packages/97/7e/274ed94ab7da21db4b7cbccad2bf2ed0940082a929b1512e508351b289f5/trl-0.8.6-py3-none-any.whl.metadata
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: trl
  Attempting uninstall: trl
    Found existing installation: trl 0.7.10
    Uninstalling trl-0.7.10:
      Successfully uninstalled trl-0.7.10
Successfully installed trl-0.8.6


In [None]:
# !pip install evaluate
# !pip install rouge_score==0.1.2

In [None]:
# !pip install -q wandb -U

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DefaultDataCollator, TrainingArguments, Trainer, BitsAndBytesConfig, GenerationConfig
from datasets import load_dataset
import torch
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
import evaluate
import pandas as pd
import numpy as np
from trl import SFTTrainer
import wandb, os

In [2]:
# https://huggingface.co/blog/4bit-transformers-bitsandbytes

In [3]:
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4", #normalized float 4, while this helps in storing weights in 4 bit
    bnb_4bit_compute_dtype= torch.bfloat16, # this choose the computation type float16, bfloat16, float32, default float32, but using float16 compute will be faster
    bnb_4bit_use_double_quant= True, #uses a second quantization after the first one to save an additional 0.4 bits per parameter
        # nested quantization
)

In [4]:
# model_id = "mistralai/Mixtral-8x7B-v0.1"
model_id = "mistralai/Mistral-7B-v0.1"
# model_id = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id, model_max_length=512,
    padding_side="left",
    add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token=True
tokenizer.add_bos_token, tokenizer.add_eos_token

model = AutoModelForCausalLM.from_pretrained(model_id, 
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.74s/it]


In [5]:
text = "<s>[INST] What is your favourite condiment? [/INST]"
"Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!</s> "
text = "<s>[INST] Do you have mayonnaise recipes? [/INST]"

input_ = tokenizer(text, return_tensors="pt").to("cuda")
print(tokenizer.decode(model.generate(**input_)[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


[INST] Do you have mayonnaise recipes? [/INST]




In [6]:
def print_no_trainable_param(model):
    trainable_param=0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_param += param.numel()
    return f"trainable model parameters: {trainable_param}\nall model parameters: {all_model_params} \n percentage of trainable params: {100 * trainable_param/all_model_params:.2f}%"

In [7]:
list(model.named_modules())

[('',
  MistralForCausalLM(
    (model): MistralModel(
      (embed_tokens): Embedding(32000, 4096)
      (layers): ModuleList(
        (0-31): 32 x MistralDecoderLayer(
          (self_attn): MistralAttention(
            (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
            (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
            (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (rotary_emb): MistralRotaryEmbedding()
          )
          (mlp): MistralMLP(
            (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
            (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
            (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): MistralRMSNorm()
          (post_attention_

In [8]:
unique_layers = set()
for name, module in model.named_modules():
    if "Linear4bit" in str(type(module)):
        layer_type = name.split('.')[-1]
        unique_layers.add(layer_type)
list(unique_layers)

['up_proj', 'q_proj', 'k_proj', 'gate_proj', 'down_proj', 'v_proj', 'o_proj']

## Peft

In [9]:
config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)
peft_model = get_peft_model(model, config)
print(print_no_trainable_param(peft_model))

trainable model parameters: 170082304
all model parameters: 3922153472 
 percentage of trainable params: 4.34%


## Params in LORA:
- r - rank in the decomposition matrix
- alpha -> scaling parameter when merging the weights on to the original model

## Dataset

In [13]:
# new data 2024_04_05
train_dataset = load_dataset("csv", data_files="./total_data_2023_04_05.csv", split="train[:90%]")
eval_dataset = load_dataset("csv", data_files="./total_data_2023_04_05.csv", split="train[90%:]")

In [15]:
next(iter(train_dataset))

{'Unnamed: 0.1': 0,
 'Unnamed: 0': 925,
 'context': 'If you have bright red bleeding of more than a spot or two at any time this month, call your care provider right away . It could be a sign of placental abruption, a serious problem in which your placenta separates from the wall of your uterus. This condition is a medical emer gency . However , try not to confuse this kind of bleeding with the slight bleeding you may have after a pelvic exam or with the blood and mucus you may see as the cervix thins. Constant, severe abdominal pain If you have constant, severe abdominal pain, contact your care provider immediately . Although uncommon, this can be another sign of placental abruption. If you also have a fever and vaginal dischar ge along with the pain, you may have an infection. Decreased movement It’s normal for the vigor of your baby’ s activities to decrease somewhat during the last few days before birth. It’ s almost as if your baby is resting and storing up ener gy for the big day

In [16]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=1024,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

def generate_prompt(data_point):
    full_prompt = f"""<s>[INST]{data_point['instruction']}
    {f"Here is some context: {data_point['context']}" if len(data_point["context"]) > 0 else None}
    [/INST] {data_point['response']}
    </s>"""
    return {"text": full_prompt}

# def generate_prompt_two(data_point):
#     full_prompt = f"""<s>[INST]### Instruction:{data_point['instruction']}
#     {f"\n\n Here is some context: ### Input:" {data_point['context']}" if len(data_point["context"]) > 0 else None}
#     [/INST]\n\n completion: {data_point['response']}
#     </s>"""
#     return {"text": full_prompt}

In [17]:
def formatting_func(example):
  if example.get("context", "") != "":
      input_prompt = (f"Below is an instruction that describes a task, paired with an input that provides further context. "
      "Write a response that appropriately completes the request.\n\n"
      "### Instruction:\n"
      f"{example['instruction']}\n\n"
      f"### Input: \n"
      f"{example['context']}\n\n"
      f"### Response: \n"
      f"{example['response']}")

  else:
    input_prompt = (f"Below is an instruction that describes a task. "
      "Write a response that appropriately completes the request.\n\n"
      "### Instruction:\n"
      f"{example['instruction']}\n\n"
      f"### Response:\n"
      f"{example['response']}")

  return {"text" : input_prompt}

In [18]:
generated_train_dataset = train_dataset.map(formatting_func, remove_columns=list(train_dataset.features))
generated_eval_dataset = eval_dataset.map(formatting_func, remove_columns=list(eval_dataset.features))

In [19]:
generated_train_dataset[12]



## Training

In [20]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    peft_model.is_parallelizable = True
    peft_model.model_parallel = True
print(torch.cuda.device_count())

4


In [21]:
wandb.login()

wandb_project = "mistral_finetune"
if len(wandb_project) > 0:
    os.environ["WANDB_PROJECT"] = wandb_project

[34m[1mwandb[0m: Currently logged in as: [33mkarkeebishwa1[0m ([33mllm_fine[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [22]:
epochs = 2
per_device_batch = 16
max_steps = int(epochs * len(generated_train_dataset)/ per_device_batch)
max_steps

2030

In [23]:
import time
import transformers
output_dir = f'./peft-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    # auto_find_batch_size=True,
    per_device_train_batch_size=per_device_batch, #4
    gradient_accumulation_steps=4, #1
    learning_rate=2e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=epochs,
    bf16=True,
    warmup_steps=0.03,
    # lr_scheduler_type=, 
    logging_steps=1,
    save_steps=5,
    max_steps=max_steps,
    report_to="wandb",
    run_name=output_dir,
    evaluation_strategy="steps",
    eval_steps=5,
    do_eval=True,
)

# peft_trainer = Trainer(
#     model=peft_model,
#     args=peft_training_args,
#     train_dataset=generated_train_dataset,
#     eval_dataset=generated_eval_dataset,
#     data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
#     tokenizer=tokenizer,
# )
# opt = optim.AdamW(learning_rate=2e-3)
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=generated_train_dataset,
    eval_dataset=generated_eval_dataset,
    peft_config=config,
    max_seq_length=1024,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=peft_training_args,
    packing=False
)

Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1805/1805 [00:00<00:00, 3115.47 examples/s]


In [None]:
trainer.train()

## Inference

In [None]:
# by default PEFT library will only save the LoRA adapters, so we need to first load the base pre-trained model

In [24]:
model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    # quantization_config = bnb_config,
    device_map="auto",
    # trust_remote_code=True,
    return_dict=True,
    torch_dtype=torch.float16,
    # use_auth_token=True
)
eval_tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    # add_bos_token=True,
    trust_remote_code=True,
    padding_size="left"
)
eval_tokenizer.pad_token = eval_tokenizer.eos_token
# eval_tokenizer.add_eos_token=True
# eval_tokenizer.add_bos_token, eval_tokenizer.add_eos_token

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.37s/it]


In [25]:
# ./peft-training-1705862492/checkpoint-205 right
# ft_model = base_model.load_adapter('./peft-training-1705869556/checkpoint-995')
# ft_model = AutoModelForCausalLM.from_pretrained('./peft-training-1705869556/checkpoint-995').to("cuda")
ft_model = PeftModel.from_pretrained(base_model, "./peft-training-1712523322/checkpoint-1005", is_trainable=False, torch_dtype=torch.bfloat16)

In [26]:
# model_infer = get_peft_model(model, ft_model)

In [27]:
def inference(instruction, context = None):
  if context:
    prompt = f"Below is an instruction that describes a task, paired with an input that provides further context.\n\n### Instruction: \n{instruction}\n\n### Input: \n{context}\n\n### Response: \n"
  else:
    prompt = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction: \n{instruction}\n\n### Response: \n"
  
  # model_input = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
  input_ids = eval_tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  ft_model.eval()
  with torch.no_grad():
    # print(eval_tokenizer.decode(ft_model.generate(**model_input)[0], skip_special_tokens=True))
      print(eval_tokenizer.decode(ft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1, pad_token_id=2))[0], 
                            skip_special_tokens=True))

def inference_two(instruction, context = None):
  if context:
     prompt = f"""<s>[INST]{instruction}
    {f"Here is some context: {context}"}
    [/INST] 
    """
  else:
    prompt = f"""<s>[INST]{instruction}
        {f"Here is some context: {None}"}
        [/INST] 
        """
  
  model_input = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
  ft_model.eval()
  with torch.no_grad():
    print(eval_tokenizer.decode(ft_model.generate(**model_input, max_length=25)[0], skip_special_tokens=True))


In [28]:
# inference_two("Convert the text into a dialogue between two characters.", "Maria's parents were strict with her, so she started to rebel against them.")

In [29]:
inference("Convert the text into a dialogue between two characters.", "Maria's parents were strict with her, so she started to rebel against them.")

Below is an instruction that describes a task, paired with an input that provides further context.

### Instruction: 
Convert the text into a dialogue between two characters.

### Input: 
Maria's parents were strict with her, so she started to rebel against them.

### Response: 
character 1: Maria, why are you so disrespectful to your parents?
character 2: They are too strict with me, and they don’t understand me.
character 1: Maria, your parents are only trying to help you. You need to respect them.
character 2: I know, but they don’t understand me. They don’t understand how hard it is to be a teenager.
character 1: Maria, you need to learn to respect your parents, even if they don’t understand you.
character 2: I know, but it’s hard. They are always telling me what to do, and they never listen to me.
character 1: Maria, your parents are only trying to help you. You need to learn to respect them.
character 2: I know, but it’s hard. They are always telling me what to do, and they never

In [30]:
context = """Thomas Jefferson (April 13, 1743 – July 4, 1826) was an American statesman, diplomat, lawyer, architect,
philosopher, and Founding Father who served as the third president of the United States from 1801 to 1809. 
Among the Committee of Five charged by the Second Continental Congress with authoring the Declaration of Independence, 
Jefferson was the Declaration's primary author. Following the American Revolutionary War and prior to becoming 
the nation's third president in 1801, Jefferson was the firstUnited States secretary of state under 
George Washington and then the nation's second vice president under John Adams."""

inference("Who is Thomas Jefferson?", context)

Below is an instruction that describes a task, paired with an input that provides further context.

### Instruction: 
Who is Thomas Jefferson?

### Input: 
Thomas Jefferson (April 13, 1743 – July 4, 1826) was an American statesman, diplomat, lawyer, architect,
philosopher, and Founding Father who served as the third president of the United States from 1801 to 1809. 
Among the Committee of Five charged by the Second Continental Congress with authoring the Declaration of Independence, 
Jefferson was the Declaration's primary author. Following the American Revolutionary War and prior to becoming 
the nation's third president in 1801, Jefferson was the firstUnited States secretary of state under 
George Washington and then the nation's second vice president under John Adams.

### Response: 
Thomas Jefferson was an American statesman, Founding Father, and the third president of the United States. He was the principal author of the Declaration of Independence and served as the first United S

In [31]:
context = """Reading railway station is a major transport hub in Reading, Berkshire, England. It is on the northern edge of the town centre, near the main retail and commercial areas and the River Thames, 36 miles (58 km) from London Paddington. The first Reading station was opened on 30 March 1840 as the temporary western terminus of the original line of the Great Western Railway (GWR). Reading is the ninth-busiest station in the UK outside London and the second busiest interchange station outside London with over 3.8 million passengers changing trains at the station annually."""
inference("When was the first Reading railway station opened?", context)

Below is an instruction that describes a task, paired with an input that provides further context.

### Instruction: 
When was the first Reading railway station opened?

### Input: 
Reading railway station is a major transport hub in Reading, Berkshire, England. It is on the northern edge of the town centre, near the main retail and commercial areas and the River Thames, 36 miles (58 km) from London Paddington. The first Reading station was opened on 30 March 1840 as the temporary western terminus of the original line of the Great Western Railway (GWR). Reading is the ninth-busiest station in the UK outside London and the second busiest interchange station outside London with over 3.8 million passengers changing trains at the station annually.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840.

### Response: 
30 March 1840

In [32]:
inference("What is a good way to get around without a car?")

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction: 
What is a good way to get around without a car?

### Response: 
 A good way to get around without a car is by using public transportation such as buses, trains, or subways. These forms of transportation are efficient, reliable, and can be a cost-effective alternative to driving a car. Additionally, using public transportation can help reduce congestion on the roads and minimize the environmental impact of driving.

I hope this helps! Let me know if you have any other questions.

### Response: 
 Public transportation is a great way to get around without a car. It can be more efficient and reliable than driving, and it can also be a more sustainable option. Additionally, using public transportation can save money on fuel and maintenance costs.

I hope this helps! Let me know if you have any other questions.

### Response: 
 Using public transportation is a great wa

## Evaluate

In [33]:
next(iter(eval_dataset))

{'Unnamed: 0.1': 16247,
 'Unnamed: 0': 745,
 'context': 'spore) is causing the infection • Whether you have antibodies to the organism from a prior exposure • Whether the disease is treatable • When during pregnancy you acquired the infection Even if you get an infection during pregnancy, your baby might not become infected—and even if your baby gets infected, he might not be harmed. The chart on pages 132–133 identiﬁes infections that are harmful during pregnancy, and the following sections provide information on the most serious of them. Ways to Avoid Getting Sick The best way to prevent complications from an infection is to avoid getting sick. Here are a few guidelines to follow: 1. Wash your hands several times each day, especially before eating and after using the toilet. Germs live on doorknobs, handrails, phones, hands, and other surfaces. After touching a germ-covered surface with your hands, you transmit the germs to your food, mouth, nose, and anything else you touch. 2. Stay

In [34]:
def inference(instruction, context = None):
  if context:
    prompt = f"Below is an instruction that describes a task, paired with an input that provides further context.\n\n### Instruction: \n{instruction}\n\n### Input: \n{context}\n\n### Response: \n"
  else:
    prompt = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction: \n{instruction}\n\n### Response: \n"
  
  # model_input = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
  input_ids = eval_tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  ft_model.eval()
  with torch.no_grad():
      return eval_tokenizer.decode(ft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=50, num_beams=1, pad_token_id=2))[0], 
                            skip_special_tokens=True)


In [35]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in eval_dataset:
    out = inference(data['instruction'], data['context'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out.split('Response: \n')[-1])

In [36]:
output['model_output'][0].split('Response')[-1]

' To reduce your risk of developing complications from an infection during pregnancy, its important to maintain a healthy lifestyle, including getting regular exercise and eating a balanced diet. Additionally, practicing good hygiene and avoiding exposure to harmful substances can help reduce your'

In [37]:
output['response']

[' By taking proactive measures such as frequent handwashing, avoiding close contact with sick individuals, and ensuring your vaccinations are up to date, you can significantly lower your risk of experiencing complications from an infection during pregnancy. Additionally, maintaining a healthy lifestyle throughout pregnancy can also help boost your immune system and reduce the likelihood of developing complications.',
 ' Fathers play a significant role in shaping the health of their offspring beyond conception through their lifestyle choices and metabolic status, which can influence the development and growth of their unborn child.',
 ' Low birth weight (LBW) refers to infants who weigh less than 2500 grams at birth, while small for gestational age (SGA) refers to infants who are smaller than the average size for their gestational age. The terms are not mutually exclusive, and many infants are classified as both LBW and SGA.\n\n',
 ' Using a birthing ball can help decrease discomfort f

In [38]:
df = pd.DataFrame.from_dict(output)
df.head()

Unnamed: 0,instruction,context,response,model_output
0,How can I reduce my risk of developing compli...,spore) is causing the infection • Whether you ...,By taking proactive measures such as frequent...,To reduce your risk of developing complicatio...
1,How do fathers play a role in shaping the hea...,of sperm with morpho - logic abnormalities [16...,Fathers play a significant role in shaping th...,Paternal factors influencing infant birthweig...
2,What is the difference between low birth weig...,"normal from abnormal,soobservationstendtobecat...",Low birth weight (LBW) refers to infants who ...,The difference between low birth weight (LBW)...
3,What is the benefit of using a birthing ball ...,washcloth on your face helps ease tension and ...,Using a birthing ball can help decrease disco...,Birthing ball Leaning or sitting on a lar ge ...
4,What are the implications of these findings f...,of these studies was that of Chanarin and Roth...,The study by Chanarin and Rothman suggests th...,The findings suggest that a dose of 30 mg of ...


In [39]:
df['model_output'].iloc[0]

' To reduce your risk of developing complications from an infection during pregnancy, its important to maintain a healthy lifestyle, including getting regular exercise and eating a balanced diet. Additionally, practicing good hygiene and avoiding exposure to harmful substances can help reduce your'

In [40]:
df['response'].iloc[0]

' By taking proactive measures such as frequent handwashing, avoiding close contact with sick individuals, and ensuring your vaccinations are up to date, you can significantly lower your risk of experiencing complications from an infection during pregnancy. Additionally, maintaining a healthy lifestyle throughout pregnancy can also help boost your immune system and reduce the likelihood of developing complications.'

In [41]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)

{'rouge1': 0.37580116696448496, 'rouge2': 0.21596739656577219, 'rougeL': 0.30634052940796297, 'rougeLsum': 0.3064891810078144}


In [42]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in train_dataset:
    out = inference(data['instruction'], data['context'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out.split('Response: \n')[-1])

KeyboardInterrupt: 

In [None]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)

In [None]:
df.shape

In [None]:
df.to_csv('prediction.csv')

## Merge to base model

In [4]:
model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    # quantization_config = bnb_config,
    device_map="auto",
    # trust_remote_code=True,
    return_dict=True,
    torch_dtype=torch.float16,
    # use_auth_token=True
)
ft_model = PeftModel.from_pretrained(base_model, "./peft-training-1712523322/checkpoint-1005", is_trainable=False, torch_dtype=torch.bfloat16)

Loading checkpoint shards: 100%|██████████████████| 2/2 [00:05<00:00,  2.70s/it]


In [6]:
merged_model = ft_model.merge_and_unload()
merged_model.save_pretrained('merged_model.pth')