In [1]:
%%capture
!pip install -U transformers
!pip install -U datasets
!pip install -U accelerate
!pip install -U peft
!pip install -U trl
!pip install -U bitsandbytes
!pip install -U wandb

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [5]:
from huggingface_hub import login
from google.colab import userdata
hf_token=userdata.get('hft')



login(token = hf_token)

wb_token = userdata.get('wandb')

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlucyfursinahag[0m ([33mlucyfursinahag-vit-bhopal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [6]:

base_model = "meta-llama/Meta-Llama-3-8B-Instruct"  # Update with your local path in Colab
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3-8b-med-tuner"


In [7]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [8]:
!pip install -U bitsandbytes
!pip install --upgrade transformers timm

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation,

)





The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [9]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [10]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [11]:
#Importing the dataset
dataset_name = "ruslanmv/ai-medical-chatbot"

dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

dataset['text'][3]

README.md:   0%|          | 0.00/863 [00:00<?, ?B/s]

dialogues.parquet:   0%|          | 0.00/142M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/256916 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nFell on sidewalk face first about 8 hrs ago. Swollen, cut lip bruised and cut knee, and hurt pride initially. Now have muscle and shoulder pain, stiff jaw(think this is from the really swollen lip),pain in wrist, and headache. I assume this is all normal but are there specific things I should look for or will I just be in pain for a while given the hard fall?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello and welcome to HCM,The injuries caused on various body parts have to be managed.The cut and swollen lip has to be managed by sterile dressing.The body pains, pain on injured site and jaw pain should be managed by pain killer and muscle relaxant.I suggest you to consult your primary healthcare provider for clinical assessment.In case there is evidence of infection in any of the injured sites, a course of antibiotics may have to be started to control the infection.Thanks and take careDr Shailja P Wahal<|eot_i

In [12]:
dataset = dataset.train_test_split(test_size=0.1)

In [13]:
new_model="llama-8b-med-tuned"

In [14]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
    gradient_checkpointing=True

)




In [26]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    #max_seq_length=512,
    #dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    #packing= False,
)

  trainer = SFTTrainer(


Applying chat template to train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [27]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, padding_side="right") # padding side set to right to be consistent with chat template

# Add padding token if not present
if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))

# Ensure the model is in training mode before starting training
#model.train() # Add this line

Embedding(128257, 4096)

In [28]:
for param in model.parameters():
    if not param.requires_grad:
        print("Parameter does not require gradients")


Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
Parameter does not require gradients
P

In [29]:
for name, param in model.named_parameters():
    if param.requires_grad and not torch.is_floating_point(param.data):
        print(f"Parameter {name} has invalid dtype: {param.data.dtype}")


In [30]:
for param in model.parameters():
    param.requires_grad = True


RuntimeError: only Tensors of floating point and complex dtype can require gradients

In [31]:
model.train()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128257, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [32]:


trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
180,2.9112,2.675745
360,2.4206,2.665204
540,2.359,2.60989
720,2.5555,2.567071
900,2.3148,2.542778


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=900, training_loss=2.521121243370904, metrics={'train_runtime': 1849.8417, 'train_samples_per_second': 0.487, 'train_steps_per_second': 0.487, 'total_flos': 9345230984871936.0, 'train_loss': 2.521121243370904})

In [33]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▇▅▂▁
eval/mean_token_accuracy,▁▁▄▆█
eval/runtime,█▆▁▆▆
eval/samples_per_second,▁▄█▃▃
eval/steps_per_second,▁▄█▃▃
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█
train/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▁▁▁▁▁▁▂▁▁▁▅▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▇████▇▇▇▇▆▆▆▆▆▆▆▆▆▆▅▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁
train/loss,▇▆▆▄▃▇▁█▅▆▅▆▄▇▃▆▄▆▇▆▆▅▃▇█▅█▆▇▂▇▆▇▅▆▆▄▇▄▅

0,1
eval/loss,2.54278
eval/mean_token_accuracy,0.45691
eval/runtime,45.9363
eval/samples_per_second,2.177
eval/steps_per_second,2.177
total_flos,9345230984871936.0
train/epoch,1.0
train/global_step,900.0
train/grad_norm,5.40691
train/learning_rate,0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [35]:
from google.colab import files
uploaded = files.upload()

Saving faster_whisper_module.py to faster_whisper_module.py


In [36]:
import faster_whisper_module

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m55.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.5/39.5 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.6/38.6 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m78.6 MB/s[0m eta [36

In [38]:
trans=faster_whisper_module.process_audio()
print(trans)

Saving test_audio001.wav to test_audio001.wav


config.json:   0%|          | 0.00/2.31k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.20M [00:00<?, ?B/s]

model.bin:   0%|          | 0.00/145M [00:00<?, ?B/s]

vocabulary.txt:   0%|          | 0.00/460k [00:00<?, ?B/s]


🔍 Transcribing...
 Maintain proper hydration and rest for recovery.


In [39]:
print(trans)
query=input("enter the query: ")
t=trans+query
print(t)

 Maintain proper hydration and rest for recovery.
enter the query: i have frequent fever and pain in chest
 Maintain proper hydration and rest for recovery.i have frequent fever and pain in chest


In [40]:
messages = [
    {
        "role": "user",
        "content": "t"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False,
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True,
                   truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150,
                         num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




Hello, I have gone through your query. From what you have mentioned, it seems that you are having a dry cough. I would suggest you to use some cough syrups like Tab Robitussin or Tab Mucodyne for 3-5 days. Also, you can use some expectorant like Tab Mucopain for 3-5 days. If you have a fever, then you can use some antipyretic like Tab Paracetamol for 3-5 days. Hope I have answered your query. Let me know if I can assist you further. Regards, Dr. Shinas Hussain, General & Family Physician


In [41]:
trainer.model.save_pretrained(new_model)
#trainer.model.push_to_hub(new_model, use_temp_dir=False)

