In [1]:
# Install required libraries
!pip install transformers datasets accelerate bitsandbytes peft trl wandb huggingface_hub

Collecting datasets
  Downloading datasets-3.4.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [2]:
# Login to Hugging Face
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
# Login to Weights & Biases
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdeepdey524[0m ([33mdeepdey524-manipal-university-jaipur[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os


In [5]:
# Load and preprocess the dataset
data = load_dataset("tatsu-lab/alpaca", split="train")
data_df = data.to_pandas()
data_df = data_df[:5000]  # Using first 5000 samples to stay within Colab limits
data_df["text"] = data_df.apply(lambda x: f"###Human: {x['instruction']} {x['input']} ###Assistant: {x['output']}", axis=1)
data = Dataset.from_pandas(data_df)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.47k [00:00<?, ?B/s]

(…)-00000-of-00001-a09b74b3ef9c3b56.parquet:   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/52002 [00:00<?, ? examples/s]

In [6]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.pad_token = tokenizer.eos_token


tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [7]:
# Enable 4-bit quantization to fit in Colab
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [8]:
# Load Mistral-7B with quantization
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    quantization_config=quantization_config,
    device_map="auto"
)


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [9]:
# Prepare for training
model.config.use_cache = False
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [10]:
# Apply LoRA fine-tuning
peft_config = LoraConfig(
    r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
)
model = get_peft_model(model, peft_config)

In [11]:
# Set training arguments
training_arguments = TrainingArguments(
    output_dir="MISTRAL-7B-LLM-API-USE",
    per_device_train_batch_size=2,  # Lower batch size to fit in free T4 GPU
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=10,
    num_train_epochs=1,
    fp16=True,
    push_to_hub=True,
    report_to="wandb",
    max_steps=150
)

In [12]:
# Initialize trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    peft_config=peft_config,
    args=training_arguments,
    tokenizer=tokenizer
)


  trainer = SFTTrainer(


Converting train dataset to ChatML:   0%|          | 0/5000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [13]:
# Train the model
trainer.train()



  return fn(*args, **kwargs)


Step,Training Loss
10,1.7532
20,1.3685
30,1.3019
40,1.2173
50,1.1922
60,1.2433
70,1.1364
80,1.2274
90,1.1758
100,1.218


TrainOutput(global_step=150, training_loss=1.2448005485534668, metrics={'train_runtime': 1116.465, 'train_samples_per_second': 1.075, 'train_steps_per_second': 0.134, 'total_flos': 5929056664043520.0, 'train_loss': 1.2448005485534668})

In [14]:
# Push the fine-tuned model to Hugging Face Hub
trainer.model.push_to_hub("MISTRAL-7B-LLM-API-USE")
tokenizer.push_to_hub("MISTRAL-7B-LLM-API-USE")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/deep0210/MISTRAL-7B-LLM-API-USE/commit/a8be24e35bf87d86967d9497b58ed281cce9f9dc', commit_message='Upload tokenizer', commit_description='', oid='a8be24e35bf87d86967d9497b58ed281cce9f9dc', pr_url=None, repo_url=RepoUrl('https://huggingface.co/deep0210/MISTRAL-7B-LLM-API-USE', endpoint='https://huggingface.co', repo_type='model', repo_id='deep0210/MISTRAL-7B-LLM-API-USE'), pr_revision=None, pr_num=None)

In [17]:
from transformers import AutoConfig

MODEL_ID = "MISTRAL-7B-LLM-API-USE"

# Create config.json with correct model type
config = AutoConfig.from_pretrained("mistralai/Mistral-7B-v0.1")
config.save_pretrained(MODEL_ID)

# Push updated config to Hugging Face Hub
from huggingface_hub import HfApi
api = HfApi()
api.upload_folder(folder_path=MODEL_ID, repo_id="deep0210/MISTRAL-7B-LLM-API-USE")


optimizer.pt:   0%|          | 0.00/54.6M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/deep0210/MISTRAL-7B-LLM-API-USE/commit/cdd8c4f15d59b10a6c7df30e713d67868a63d5e2', commit_message='Upload folder using huggingface_hub', commit_description='', oid='cdd8c4f15d59b10a6c7df30e713d67868a63d5e2', pr_url=None, repo_url=RepoUrl('https://huggingface.co/deep0210/MISTRAL-7B-LLM-API-USE', endpoint='https://huggingface.co', repo_type='model', repo_id='deep0210/MISTRAL-7B-LLM-API-USE'), pr_revision=None, pr_num=None)

In [18]:
print("Model fine-tuning complete and uploaded to Hugging Face!")

Model fine-tuning complete and uploaded to Hugging Face!
