In [13]:
!pip install --no-deps xformers trl peft accelerate bitsandbytes evaluate nltk rouge_score

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=bd79e58973daa50c4c428467387f2245b6dd07180e14d9fd71f61ddca2a5b2bf
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score, evaluate
Successfully installed evaluate-0.4.3 rouge_score-0.1.2


In [2]:
!pip install "unsloth[colab-new] @ git+https://GitHub.com/unslothai/unsloth.git"

Collecting unsloth@ git+https://GitHub.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://GitHub.com/unslothai/unsloth.git)
  Cloning https://GitHub.com/unslothai/unsloth.git to /tmp/pip-install-042_wcy8/unsloth_9712af0696314e33ba0783bc253a882c
  Running command git clone --filter=blob:none --quiet https://GitHub.com/unslothai/unsloth.git /tmp/pip-install-042_wcy8/unsloth_9712af0696314e33ba0783bc253a882c
  Resolved https://GitHub.com/unslothai/unsloth.git to commit 85f1fa096afde5efe2fb8521d8ceec8d13a00715
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [3]:
import json
import torch
from datasets import load_dataset, Dataset
from huggingface_hub import notebook_login
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
import evaluate

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


---
**FINE-TUNING LLAMA3 8B**

In [4]:
# Loading the model and the tokinizer for the model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
)

# Setup for LoRA peft of the base model
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    use_gradient_checkpointing = True,
    random_state = 42
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
dataset_train = load_dataset("sathvik123/llama3-medical-dataset", split = "train")

In [9]:
dataset_train['prompt'][0]

"<|start_header_id|>system<|end_header_id|>If you are a doctor, please answer the medical questions based on the patient's description.<|eot_id|><|start_header_id|>user<|end_header_id|> This is the question: im a 39yr old female,i just had a spinal fusion surgery on my back in December. in December I missed my period,january comes I get my period but its really heavy and then I started vomiting for 8 days straight when my period was over s were my symptoms, then febuary comes same thing I get my period and vomit for 8 days again,now march just before my period started I vomited for 8 days then my period started and I vomited for another few days . ive been hospitalized for dehydration 5 times since jan. before my back surgery I was on a high dose of morphine for pain but im off it now and have been for 6 weeks my dr thought it could be my receptors in my brain he also said I didnt have any of my own endorphins to fight pain so I need to work on building them back up.but even now my dr 

In [15]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    dataset_text_field = "prompt",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 100,
        num_train_epochs= 1,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
        report_to = "none",
    ),
)


Map (num_proc=2):   0%|          | 0/89732 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [16]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 89,732 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 100
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.3002
2,2.2305
3,2.2984
4,2.0405
5,2.1464
6,2.4351
7,2.0371
8,2.3685
9,2.2906
10,2.1819


---
**SAVING MODEL TO HUGGINGFACE**

In [33]:
model.save_pretrained_gguf("gguf_model", tokenizer, quantization_method = "q4_k_m")

Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 5.7G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 5.9 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 47%|████▋     | 15/32 [00:01<00:01, 11.90it/s]We will save to Disk and not RAM now.
100%|██████████| 32/32 [01:14<00:00,  2.34s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving gguf_model/pytorch_model-00001-of-00004.bin...
Unsloth: Saving gguf_model/pytorch_model-00002-of-00004.bin...
Unsloth: Saving gguf_model/pytorch_model-00003-of-00004.bin...
Unsloth: Saving gguf_model/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: Converting llama model. Can use fast conversion = False.


==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: CMAKE detected. Finalizing some steps for installation.
Unsloth: [1] Converting model at gguf_model into f16 GGUF format.
The output location will be /content/gguf_model/unsloth.F16.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: gguf_model
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'
INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00004.bin'
INFO:hf-to-gguf:token_embd.weight,           torch.float16 --> F

In [34]:
from huggingface_hub import HfApi
api = HfApi(token='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')

model_id = "sathvik123/llama3-ChatDoc"
api.create_repo(model_id, exist_ok=True, repo_type="model")
api.upload_file(
    path_or_fileobj="/content/gguf_model/unsloth.Q4_K_M.gguf",
    path_in_repo="gguf_model-unsloth.Q4_K_M.gguf",
    repo_id=model_id,
)

unsloth.Q4_K_M.gguf:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/sathvik123/llama3-ChatDoc/commit/f04abcb819de2b1a5a9c665067581e6d705d22f5', commit_message='Upload gguf_model-unsloth.Q4_K_M.gguf with huggingface_hub', commit_description='', oid='f04abcb819de2b1a5a9c665067581e6d705d22f5', pr_url=None, repo_url=RepoUrl('https://huggingface.co/sathvik123/llama3-ChatDoc', endpoint='https://huggingface.co', repo_type='model', repo_id='sathvik123/llama3-ChatDoc'), pr_revision=None, pr_num=None)