In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

In [None]:
#Put your Hf and wandb Secrets here
secret_hf = ""
secret_wandb = ""
!huggingface-cli login --token $secret_hf

In [None]:
wandb.login(key = secret_wandb)
run = wandb.init(
    project='Fine tuning mistral 7B',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msaksham1387[0m ([33msaksham-321[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
base_model = "mistralai/Mistral-7B-Instruct-v0.3"

new_model = "mistral_7b_stutter"

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import Dataset
import os

# Path to your dataset directory
data_dir = "/content/drive/MyDrive/Stutter_Main 3"

# Load the text files from the directory
text_data = []
for file_name in os.listdir(data_dir):
    if file_name.endswith(".txt"):
        with open(os.path.join(data_dir, file_name), "r", encoding="utf-8") as file:
            text_data.append(file.read())

# Create a dataset dictionary
dataset_dict = {
    "text": text_data,
    "label": [0] * len(text_data)  # Dummy labels, replace with your actual labels
}

# Create a Dataset object
dataset = Dataset.from_dict(dataset_dict)

# Print the first few examples
print(dataset[:5])




In [None]:
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,

        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

(True, True)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [None]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb"
)


In [None]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)
trainer.train()




Map:   0%|          | 0/22 [00:00<?, ? examples/s]



Step,Training Loss


TrainOutput(global_step=6, training_loss=1.6188340187072754, metrics={'train_runtime': 287.4698, 'train_samples_per_second': 0.077, 'train_steps_per_second': 0.021, 'total_flos': 974031490646016.0, 'train_loss': 1.6188340187072754, 'epoch': 1.0})

In [None]:
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁
train/global_step,▁

0,1
total_flos,974031490646016.0
train/epoch,1.0
train/global_step,6.0
train_loss,1.61883
train_runtime,287.4698
train_samples_per_second,0.077
train_steps_per_second,0.021


In [None]:
trainer.model.push_to_hub(new_model, use_temp_dir=False)

adapter_model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Saksham54/mistral_7b_stutter/commit/6c7d84aa24fd704278b9766d2b08a52e8442104c', commit_message='Upload model', commit_description='', oid='6c7d84aa24fd704278b9766d2b08a52e8442104c', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt = "What is 2+2?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] What is 2+2? [/INST] The sum of 2 and 2 is 4.


In [None]:
base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        device_map="auto",
        trust_remote_code=True,
        offload_folder="offload"
)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
model = PeftModel.from_pretrained(base_model_reload, new_model,offload_folder = "offload")




NotImplementedError: Cannot copy out of meta tensor; no data!

In [None]:
#------------------------ Merging the PEFT with the Base-Model-----------------------------
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

In [None]:

base_model = "mistralai/Mistral-7B-Instruct-v0.3"
base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True,add_eos_token=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 

In [None]:
model= PeftModel.from_pretrained(base_model_reload,"/content/mistral_7b_stutter")
model= model.merge_and_unload()



KeyError: 'base_model.model.model.model.layers.10.input_layernorm'

In [None]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)
