In [2]:
import json

# Load original JSON
with open("/content/3k.json", "r") as f:
    data = json.load(f)

# Convert format
formatted_data = []
for item in data:
    text = item.get("text", "")
    if "q:" in text and "a:" in text:
        parts = text.split("q:", 1)[1].split("a:")
        if len(parts) == 2:
            question = parts[0].strip()
            answer = parts[1].strip()
            formatted_data.append({
                "instruction": question,
                "input": "",
                "output": answer
            })

with open("health_insurance_qa_formatted.json", "w") as f:
    json.dump(formatted_data, f, indent=2)

print(f"Total examples: {len(formatted_data)}")


Total examples: 998


In [18]:
!pip install peft
!pip install accelerate
!pip install bitsandBytes
!pip install transformers
!pip install datasets



In [19]:
!pip install GPUtil



In [20]:
import torch
import GPUtil
import os

GPUtil.showUtilization()

if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available, using CPU instead")

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
GPU is available


In [21]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
from huggingface_hub import notebook_login
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

if "COLAB_GPU" in os.environ:
  from google.colab import output
  output.enable_custom_widget_manager()

In [22]:
if "COLAB_GPU" in os.environ:
  !huggingface-cli login
else:
  notebook_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineG

In [24]:
base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
!git clone https://github.com/poloclub/Fine-tuning-LLMs.git

Cloning into 'Fine-tuning-LLMs'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (47/47), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 47 (delta 14), reused 29 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (47/47), 9.34 MiB | 24.90 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [25]:
from datasets import load_dataset

# Load your JSON dataset
train_dataset = load_dataset(
    "json",
    data_files="/content/health_insurance_qa_formatted.json",
    split="train"
)

Generating train split: 0 examples [00:00, ? examples/s]

In [28]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

In [30]:
tokenized_train_dataset = []
for phrase in train_dataset:
  # Construct the text field from instruction and output
  text = phrase["instruction"] + phrase["output"]
  tokenized_train_dataset.append(tokenizer(text)) # Tokenize the combined text

In [31]:
tokenized_train_dataset[1]

{'input_ids': [1, 4230, 28705, 6183, 734, 28705, 1388, 28705, 1411, 28705, 6919, 28705, 1024, 28705, 3168, 28804, 335, 272, 1338, 693, 4568, 1753, 403, 272, 624, 395, 272, 10953, 28725, 6183, 734, 6741, 3573, 1388, 272, 1411, 13, 1126, 4935, 8864, 28713, 1749, 390, 1043, 390, 272, 3749, 2650, 628, 403, 264, 1338, 28723, 272, 2445, 868, 17827, 298, 369, 13, 3173, 797, 2650, 628, 28725, 304, 390, 1043, 390, 6183, 734, 511, 459, 506, 264, 3452, 1835, 272, 3749, 2650, 628, 28725, 590, 3573, 1388, 13, 10387, 6919, 8864, 28713, 477, 706, 28723, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [32]:
tokenized_train_dataset[2]

{'input_ids': [1, 6802, 28705, 1235, 28705, 7861, 492, 28705, 744, 28705, 287, 28705, 2796, 28804, 1591, 294, 492, 744, 287, 12784, 272, 6676, 3345, 28725, 575, 27792, 6556, 3345, 28725, 5714, 3345, 304, 13, 10809, 7631, 28723, 736, 349, 264, 14890, 2434, 11226, 298, 272, 2809, 4908, 1877, 3874, 28723, 736, 349, 264, 13, 3802, 3699, 1070, 304, 28705, 28750, 28734, 28823, 5029, 339, 1339, 513, 297, 1352, 893, 28723, 297, 4518, 368, 2136, 544, 6966, 354, 3345, 304, 13679, 13, 1478, 6823, 486, 7861, 492, 28723, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [33]:
tokenizer.eos_token

'</s>'

In [34]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [35]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=transformers.TrainingArguments(
        output_dir="./finetunedModel",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        learning_rate=2e-5,
        max_steps=20,
        bf16=False,
        optim="paged_adamw_8bit",
        logging_dir="./log",
        save_strategy="epoch",
        save_steps=50,
        logging_steps=10

),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache=False
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maakashkshirsagar2000[0m ([33maakashkshirsagar2000-3k[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss
10,2.3933
20,2.0558


TrainOutput(global_step=20, training_loss=2.2245719909667967, metrics={'train_runtime': 372.5786, 'train_samples_per_second': 0.215, 'train_steps_per_second': 0.054, 'total_flos': 623019337973760.0, 'train_loss': 2.2245719909667967, 'epoch': 0.08016032064128256})

In [36]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, LlamaTokenizer
from peft import PeftModel

base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

nf4Config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=nf4Config,
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
  )




Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [37]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True

                              )

modelFinetuned = PeftModel.from_pretrained(base_model, "finetunedModel/checkpoint-20")

In [38]:
# Example health insurance question from your dataset domain
user_question = "Does Medicare Part B cover outpatient services?"

# Format prompt as per instruction-tuned style
eval_prompt = f"{user_question}\n"

# Tokenize and move to GPU
promptTokenized = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

# Set model to evaluation mode
model.eval()

# Generate and decode answer
with torch.no_grad():
    output = model.generate(
        **promptTokenized,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.95,
        temperature=0.7
    )
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    print("✅ Model Response:\n", response)

# Clear GPU cache
torch.cuda.empty_cache()


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


✅ Model Response:
 Does Medicare Part B cover outpatient services?
 1. Yes, Medicare Part B covers outpatient services. Medicare Part B is the medical insurance portion of Medicare. It covers most outpatient services. The services include the following:

* Doctor visits
* Home health care services
* Durable medical equipment
* Outpatient surgery
* Ambulance services
* Diagnostic tests
* Mental health services
* Medical nutritional therapy
* Outpatient prescription drugs
* Hospice care
* Preventive services
* Clinical laboratory services

2. The Part B Deductible: The Part B Deductible is the amount you must pay before Medicare begins to cover your medical expenses. The Part B Deductible is $183 in 2013. This is the amount you must pay for all your Part B expenses before Medicare begins to cover your expenses. This is not the amount you pay to the doctor, but the amount you must pay out of your pocket. You pay the Part B deductible once a year.

3. The Part B Coinsurance: The Part B Coi