In [1]:
!pip install torch transformers accelerate datasets peft bitsandbytes safetensors trl

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting trl
  Downloading trl-0.15.0-py3-none-any.whl.metadata (11 kB)
Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading trl-0.15.0-py3-none-any.whl (318 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.3/318.3 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl, bitsandbytes
Successfully installed bitsandbytes-0.45.2 trl-0.15.0


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model

In [3]:
import torch
import pandas as pd
import kagglehub

### Load Mistral Model & Tokenizer

In [4]:
model_name="mistralai/Mistral-7B-Instruct-v0.3"

In [7]:
from huggingface_hub import login

# Replace with your actual Hugging Face token
HF_TOKEN = "YOUR_TOKEN_HERE"

# Login to Hugging Face
login(HF_TOKEN)


In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [7]:
# model = AutoModelForCausalLM.from_pretrained(
#     model_name, 
#     torch_dtype=torch.float16,  # Use half-precision for better performance
#     device_map="auto"  # Automatically assigns model to GPU if available
# )

## Load Mistral in 4-bit Mode (Low Memory Usage)

In [8]:
from transformers import BitsAndBytesConfig

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,  # Efficient computation
    bnb_4bit_use_double_quant=True,  # Improves memory efficiency
)

In [37]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### Test with a sample prompt

In [38]:
prompt = "How can I stay healthy with a busy schedule?"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=100)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


How can I stay healthy with a busy schedule?

Staying healthy with a busy schedule can be challenging, but it's not impossible. Here are some tips:

1. Prioritize Sleep: Aim for 7-9 hours of sleep per night. Lack of sleep can lead to various health problems and decreased productivity.

2. Balanced Diet: Try to eat a balanced diet with plenty of fruits, vegetables, lean proteins, and whole


## Import torch for fine tuning

In [39]:
print(torch.cuda.is_available())  # Should return True

True


In [40]:
print(torch.cuda.get_device_name(0)) 

Tesla T4


# Fine-Tuning Mistral for Health Coaching 🚀

## Fine-tuning data inspection

In [41]:

# # Download latest version
# path = kagglehub.dataset_download("joshharry/med-datasets")

# print("Path to dataset files:", path)

In [42]:
# data = pd.read_csv("/kaggle/input/med-datasets/HealthCareMagic-100k.csv")
# data.head(2)

In [43]:
# print(data.iloc[1]['input'])
# print("")
# print(data.iloc[1]['output'])

In [44]:
# df = data.copy()

In [45]:
# formatted_data = [
#     {"prompt": row["input"], "response": row["output"]}
#     for _, row in df.iterrows()
# ]

In [46]:
# formatted_data[0]

### Use LoRA to reduce training cost and time.

In [47]:
# # Apply LoRA
# lora_config = LoraConfig(
#     r=8,  # Rank parameter
#     lora_alpha=16,
#     lora_dropout=0.1,
#     bias="none",
#     task_type="CAUSAL_LM"
# )

lora_config = LoraConfig(
    r=8,  # Low-rank dimension
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Apply LoRA to attention layers
    lora_dropout=0.05,  # Dropout for stability
    bias="none",
    task_type="CAUSAL_LM",
)


In [48]:
model = get_peft_model(model, lora_config)

In [49]:
model.print_trainable_parameters()


trainable params: 3,407,872 || all params: 7,251,431,424 || trainable%: 0.0470


## Load the fine-tuning dataset

In [50]:
dataset = load_dataset("json", data_files="/kaggle/input/aihealthcoachfinetuningdata/finetuning_data.json")

In [51]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response'],
        num_rows: 1121
    })
})

In [52]:
len(dataset['train'])

1121

## Drop the empty rows

In [53]:
dataset['train'] = dataset['train'].filter(lambda x: x['prompt'] is not None and x['response'] is not None)

In [54]:
len(dataset['train'])

1120

In [55]:
# print(dataset['train'][851])

### Tokenize function

In [56]:
# tokenizer = AutoTokenizer.from_pretrained(model_name)

In [57]:
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})

In [58]:
# def tokenize_function(example):
#     # Tokenize the 'prompt' for input_ids
#     encoding = tokenizer(example["prompt"], 
#                          padding="max_length", 
#                          truncation=True, 
#                          max_length=512)
    
#     # Tokenize the 'response' for labels
#     labels_encoding = tokenizer(example["response"], 
#                                 padding="max_length", 
#                                 truncation=True, 
#                                 max_length=512)
    
#     # Return both input_ids and labels
#     encoding['labels'] = labels_encoding['input_ids']
    
#     return encoding

# # Apply tokenization to the dataset
# tokenized_datasets = dataset.map(tokenize_function, batched=True)


In [59]:
# tokenized_datasets

## Split the tokenized dataset for train and test

In [60]:
dataset = dataset["train"].train_test_split(test_size=0.1)

In [61]:
def tokenize_function(examples):
    return tokenizer(examples["prompt"], text_target=examples["response"], truncation=True, padding="max_length", max_length=512)


In [62]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/1008 [00:00<?, ? examples/s]

Map:   0%|          | 0/112 [00:00<?, ? examples/s]

In [63]:
# # Split the 'train' dataset into 'train' and 'validation'
# train_test_split_result = tokenized_datasets["train"].train_test_split(test_size=0.2)

# # Access the 'train' and 'test' (validation) splits from the result
# train_dataset = train_test_split_result["train"]
# validation_dataset = train_test_split_result["test"]

# # Verify the split
# print(f"Train dataset size: {len(train_dataset)}")
# print(f"Validation dataset size: {len(validation_dataset)}")

In [64]:
# print(train_dataset)
# print(validation_dataset)

### Finally fine-tuning dataset is ready

## Start training the model

## wandb login

In [65]:
!pip install wandb



In [66]:
import wandb

In [67]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_API_KEY")

In [68]:
wandb.login(key=secret_value_0)



True

# Fine-tuning Mistral AI using LoRA

In [69]:
!pip install trl



In [70]:
training_args = TrainingArguments(
    output_dir="./mistral-health-qLora",  # Where the model will be saved
    per_device_train_batch_size=1,  # Small batch size to fit model on GPUs
    gradient_accumulation_steps=8,  # Simulate a larger batch size
    num_train_epochs=3,  # Training for 3 epochs (adjust as needed)
    save_steps=500,  # Save checkpoints every 500 steps
    save_total_limit=2,  # Limit the number of saved checkpoints
    logging_dir="./logs",  # Logging directory
    logging_steps=10,  # Log every 10 steps
    fp16=True,  # Mixed precision for faster training
    optim="paged_adamw_8bit",  # Optimizer for memory efficiency
    eval_strategy="epoch",  # Evaluate after every epoch
    save_strategy="epoch",  # Save after each epoch
    report_to="wandb",  # Avoid reporting to external platforms (optional)
)

In [71]:
# Check available GPU memory
def check_gpu_memory():
    allocated_memory = torch.cuda.memory_allocated() / 1e9  # In GB
    cached_memory = torch.cuda.memory_reserved() / 1e9  # In GB
    print(f"Allocated Memory: {allocated_memory:.2f} GB")
    print(f"Cached Memory: {cached_memory:.2f} GB")


In [72]:
import torch
print(f"Available GPU Memory: {torch.cuda.memory_allocated()/1e9:.2f} GB")


Available GPU Memory: 3.48 GB


In [73]:
# model.resize_token_embeddings(len(tokenizer))

In [74]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

In [75]:
trainer.log_callback = lambda log: check_gpu_memory()  # Monitor GPU memory after every log step

In [76]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,2.3897,2.403806
2,2.2353,2.341389
3,2.2433,2.317169


TrainOutput(global_step=378, training_loss=2.633478088984414, metrics={'train_runtime': 3644.1352, 'train_samples_per_second': 0.83, 'train_steps_per_second': 0.104, 'total_flos': 6.611697935371469e+16, 'train_loss': 2.633478088984414, 'epoch': 3.0})

In [77]:
# Save the fine-tuned model and tokenizer
model.save_pretrained("./mistral-health-qLora")
tokenizer.save_pretrained("./mistral-health-qLora")


('./mistral-health-qLora/tokenizer_config.json',
 './mistral-health-qLora/special_tokens_map.json',
 './mistral-health-qLora/tokenizer.model',
 './mistral-health-qLora/added_tokens.json',
 './mistral-health-qLora/tokenizer.json')

In [78]:
!zip -r my_output.zip /kaggle/working/mistral-health-qLora

  adding: kaggle/working/mistral-health-qLora/ (stored 0%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/ (stored 0%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/trainer_state.json (deflated 77%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/adapter_config.json (deflated 53%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/rng_state.pth (deflated 25%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/training_args.bin (deflated 52%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/adapter_model.safetensors (deflated 7%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/README.md (deflated 66%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/scheduler.pt (deflated 56%)
  adding: kaggle/working/mistral-health-qLora/checkpoint-378/optimizer.pt (deflated 12%)
  adding: kaggle/working/mistral-health-qLora/adapter_config.json (deflated 53%)
  adding: kaggle/working/mistral-health-qLora/

## Inference and Testing

### Load the Model for Inference

In [2]:
import torch
import gc

gc.collect()
torch.cuda.empty_cache()

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

In [4]:
base_model_name = "mistralai/Mistral-7B-Instruct-v0.3"  # Base model
lora_model_path = "/kaggle/input/fine-tuned-model/kaggle/working/mistral-health-qLora"  # Your trained model path

In [5]:
tokenizer = AutoTokenizer.from_pretrained(lora_model_path)

In [8]:
# Enable 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for better performance
    bnb_4bit_use_double_quant=True,  # Enable double quantization
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name, quantization_config=quantization_config, device_map="auto"
)

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [9]:
model = PeftModel.from_pretrained(base_model, lora_model_path)
model = model.merge_and_unload()  # Convert into full model for inference



### Run Inference

In [10]:
prompt = "What are the benefits of eating fiber-rich foods?"

In [11]:
inputs = tokenizer(prompt, return_tensors="pt")

In [12]:
with torch.no_grad():
    output_ids = model.generate(**inputs, max_length=150)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [13]:
# Decode output
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(response)

What are the benefits of eating fiber-rich foods?

Fiber-rich foods offer numerous health benefits, including:

1. Digestive Health: Fiber promotes regular bowel movements and prevents constipation. It also helps reduce symptoms of irritable bowel syndrome (IBS) and other digestive disorders.

2. Weight Management: High-fiber foods are filling and can help control hunger and promote weight loss.

3. Lower Cholesterol: Soluble fiber can help lower LDL (bad) cholesterol levels, reducing the risk of heart disease.

4. Blood Sugar Control: High-fiber foods can help slow the absorption of sugar,


## Uploading to the Cloud

In [18]:
hf_username = "SudhinK"  # Replace with your HF username
repo_name = f"{hf_username}/AI-Health-Coach"  # Model repo name

# Push the model
model.push_to_hub(repo_name)

# Push the tokenizer
tokenizer.push_to_hub(repo_name)

print(f"✅ Model uploaded to: https://huggingface.co/{repo_name}")

model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

✅ Model uploaded to: https://huggingface.co/SudhinK/AI-Health-Coach
