In [1]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login, login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import pandas as pd
from dotenv import load_dotenv
load_dotenv("../finetune/.env")

True

In [2]:
hf_token = os.getenv("hf_token")

In [3]:
login(hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/s448780/.cache/huggingface/token
Login successful


## Load model

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [6]:
model_id = "mistralai/Mistral-7B-v0.1"

# load model 
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    use_cache=False, 
    device_map="auto"
)
model.config.pretraining_tp = 1 #parallel GPU

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_id, 
                                          add_eos_token = True, 
                                          add_bos_token = True)
tokenizer.pad_token = tokenizer.eos_token # default is none
tokenizer.eos_token_id # for attention mask? 

2

## dataset

In [15]:
dataset = load_dataset("csv", data_files="../data/2nd_finetune/final/finetune_2.csv")
dataset

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Question', 'Context', 'Answer'],
        num_rows: 576
    })
})

In [16]:
def create_text_row(question, context, answer):
    return f"""<s>### Instruction:\n{question}\n### Context: \n{context}\n### Response: {answer}</s>"""

In [17]:
def formatting_func(df):
    questions = df["Question"]
    contexts = df["Context"]
    answers = df["Answer"]
    texts = []
    for q, c, a in zip(questions, contexts, answers):
        text = create_text_row(q, c, a)
        texts.append(text)
    return {"text" : texts}

In [18]:
dataset = dataset.map(formatting_func, batched = True)

Map:   0%|          | 0/576 [00:00<?, ? examples/s]

In [19]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Question', 'Context', 'Answer', 'text'],
        num_rows: 576
    })
})

In [20]:
print(dataset["train"]["text"][0])

<s>### Instruction:
What was Tardo most intent on?
### Context: 
Tardo had seemed most intent on the question of slavery, and Peo looked for signs of it. He could see none. The people of the planet had had time to conceal some things, of course. But the people they saw in the village wore a proud air of independence no slave could assume.  Saranta apologized for their having to walk, explaining that there was no other means of transportation on the planet.
either with the requirements of paragraphs 1.E.1 through 1.E.7 or obtain permission for the use of the work and the Project Gutenberg™ trademark as set forth in paragraphs 1.E.8 or 1.E.9.
### Response: Given the context provided, Tardo was most intent on the question of slavery. This is evident from the statement, "Tardo had seemed most intent on the question of slavery." Peo was observing for signs of slavery but could not find any, as the people in the village displayed an air of independence that no slave could assume. Therefore, 

## LoRA

In [21]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [22]:
'''
lora_alpha - scaling factor applied to the low-rank matrices. It helps in balancing the contribution of the low-rank update to the original weights. 
Higher values of lora_alpha can increase the influence of the low-rank updates. It's a form of regularization to ensure the model doesn't deviate too much from the original weights.

bias - "none", "all", or "lora_only".
need more research on this.

'''
peft_config = LoraConfig(
    r=32,
    lora_alpha=16, 
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

## Training

In [23]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [24]:
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments

model_args = TrainingArguments(
    output_dir="mistral_7b",
    num_train_epochs=3,
    # max_steps=50,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit", # apparently more efficient for 32 bit GPUs
    logging_steps=20,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    dataset_text_field = "text",
    peft_config=peft_config,
    max_seq_length=2048,
    tokenizer=tokenizer,
    packing=True,
    args=model_args,
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Generating train split: 0 examples [00:00, ? examples/s]



Step,Training Loss
20,0.8878
40,0.6912
60,0.5856
80,0.4918




TrainOutput(global_step=99, training_loss=0.6248116926713423, metrics={'train_runtime': 1793.9513, 'train_samples_per_second': 0.436, 'train_steps_per_second': 0.055, 'total_flos': 6.92223548105687e+16, 'train_loss': 0.6248116926713423, 'epoch': 3.0})

## test

In [41]:
# test_prompt = '''[INST] Assume you are a chess master, explain the strategy used by each player based on the provided chess moves. Here are the chess moves in Algenraic Notaion - e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h5 Ne4 Nc6 Be3 Qe7 Qd2 Bh6 Bxh6 Nxh6 Nf6+ Kd8 Bh7 Nf5 Bxf5 exf5 c3 h4 Qg5 g3 fxg3 hxg3 Qxg3 Qf8 Rxf5 Ne7 Rg5 Ng6 Nd2 Qh6 Rh5 Qg7 Qg4 Bc8 Rxh8+ Qxh8 Rf1 d6 Qg5 Qh4 Qe3 Bb7 e6 [/INST]'''
# test_prompt = '''### Question:
# What is the capital of Nepal?
# ### Context: 
# ### Response:
# '''
test_prompt = "What is the capital of Nepal?"

In [42]:
eval_tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    add_bos_token=True,
)

In [43]:
input_ids = eval_tokenizer(test_prompt, return_tensors="pt").input_ids.to("cuda:0")
input_ids

tensor([[    1,  1824,   349,   272,  5565,   302, 22127,   282, 28804]],
       device='cuda:0')

In [44]:
model.eval()
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        # attention_mask = torch.where(input_ids == 2, 0, 1),
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.5
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What is the capital of Nepal?

Kathmandu is the capital of Nepal.

Why is Kathmandu the capital of Nepal?

Kathmandu is the capital of Nepal because it is the largest city in the country and the center of government, commerce, and culture. It is also located in a strategic position, surrounded by mountains and valleys, making it a natural hub for trade and communication.

How many capitals has Nepal had?

Nepal has had only one capital city since its independence in 1947, and that is Kathmandu.

Is Kathmandu a good place to live?

Kathmandu can be a good place to live, depending on individual preferences and needs. The city has a rich cultural heritage, a vibrant arts scene, and a growing economy. However, it can also be noisy, polluted, and crowded, which may not be suitable for everyone.

How many capitals have there been in the world?

There have been many capitals throughout history, as countries and empires have risen and fallen. It is difficult to determine an exact number, but s

## testing base knowledge

## saving

In [93]:
trainer.save_model()



## saving to hub

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
model = AutoModelForCausalLM.from_pretrained("./mistral_7b/", device_map="cuda:0", quantization_config=bnb_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
model_id = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [10]:
model.push_to_hub("adnaan525/opensi_mistral_3tasks")

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/adnaan525/opensi_mistral_3tasks/commit/222beff7316e4508affa3e2e8eb146dd25b0750b', commit_message='Upload MistralForCausalLM', commit_description='', oid='222beff7316e4508affa3e2e8eb146dd25b0750b', pr_url=None, pr_revision=None, pr_num=None)