## Setting up the environment

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/maternal-health-risk-data/Maternal Health Risk Data Set.csv


In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.8.1-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.3/47.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.8.1 (from unsloth)
  Downloading unsloth_zoo-2025.8.1-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.27-py3-none-any.whl.metadata (11 kB)
Collecting trl!=0.15.0,!=0.19.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)
  Downloading trl-0.21.0-py3-none-any.whl.metadata (11 kB)
Collecting huggingface_hub>=0.34.0 (from unsloth)
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspe

In [None]:
%%capture
# Install latest transformers for Gemma 3N
!pip install --no-deps git+https://github.com/huggingface/transformers.git # Only for Gemma 3N
!pip install --no-deps --upgrade timm # Only for Gemma 3N

In [None]:
# updating for compatibility
!pip install huggingface-hub==0.34.0
!pip install --upgrade transformers

Collecting huggingface-hub==0.34.0
  Downloading huggingface_hub-0.34.0-py3-none-any.whl.metadata (14 kB)
Downloading huggingface_hub-0.34.0-py3-none-any.whl (558 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m558.7/558.7 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.34.3
    Uninstalling huggingface-hub-0.34.3:
      Successfully uninstalled huggingface-hub-0.34.3
Successfully installed huggingface-hub-0.34.0


In [None]:
# Common imports
import os
import optax
import treescope
# imports and libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#data loading
orginal_data = pd.read_csv('/kaggle/input/maternal-health-risk-data/Maternal Health Risk Data Set.csv')
data = orginal_data.copy()
data.loc[data.HeartRate == 7, "HeartRate"] = 70
data

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk
...,...,...,...,...,...,...,...
1009,22,120,60,15.0,98.0,80,high risk
1010,55,120,90,18.0,98.0,60,high risk
1011,35,85,60,19.0,98.0,86,high risk
1012,43,120,90,18.0,98.0,70,high risk


### Defining the unsloth gemma model

In [None]:
from unsloth import FastModel
import torch

fourbit_models = [
    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    # Pretrained models
    "unsloth/gemma-3n-E4B-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-unsloth-bnb-4bit",

    # Other Gemma 3 quants
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
] # More models at https://huggingface.co/unsloth
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    dtype = None, # None for auto detection
    max_seq_length = 1024, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    full_finetuning = False,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-08-05 19:30:18.530640: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754422218.748996      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754422218.806977      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.1: Fast Gemma3N patching. Transformers: 4.56.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Gemma3N does not support SDPA - switching to eager!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/2.65G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/469M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # Should leave on always!

    r = 8,
    lora_alpha = 8,
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

Unsloth: Making `model.base_model.model.model.language_model` require gradients


## Data Preparartion

In [None]:
# Import the function to retrieve a chat template compatible with Gemma-style formatting
from unsloth.chat_templates import get_chat_template

# Load the specific chat template for the Gemma 3 model using the tokenizer
chat_template = get_chat_template(tokenizer, chat_template="gemma-3")

# Define a function to format each training example into a chat-style conversation
def format_chat(row):
    # Create the user's input message using patient medical data from the row
    user_msg = (
        f"Given the following patient details:\n"
        f"Age: {row['Age']}, SystolicBP: {row['SystolicBP']}, DiastolicBP: {row['DiastolicBP']}, "
        f"BS: {row['BS']}, BodyTemp: {row['BodyTemp']}, HeartRate: {row['HeartRate']}\n"
        f"What is the pregnancy risk?"
    )

    # The expected assistant reply, the known label (e.g., 'High', 'Low', 'mid'.)
    assistant_msg = row['RiskLevel']

    # Format the conversation using the template:
    # - A system message defining the assistant's role
    # - A user message containing patient data
    # - The assistant's response (ground truth)
    return chat_template.apply_chat_template([
        {"role": "system", "content": "You are a medical assistant AI that predicts pregnancy risk."},
        {"role": "user", "content": user_msg},
        {"role": "assistant", "content": assistant_msg}
    ])


In [None]:
# Apply to dataframe
data["chat_prompt"] = data.apply(format_chat, axis=1)

In [None]:
# shufffle the dataset
data = data.sample(frac=1, random_state=42).reset_index(drop=True)
dataset = data[['chat_prompt']]

# split to train and eval
train = dataset[:900]
test = dataset[900:1000]
dataset = [{"text": row} for row in dataset["chat_prompt"].tolist()]
train = [{"text": row} for row in train["chat_prompt"].tolist()]
test = [{"text": row} for row in test["chat_prompt"].tolist()]
dataset

[{'text': '<bos><start_of_turn>user\nYou are a medical assistant AI that predicts pregnancy risk.\n\nGiven the following patient details:\nAge: 29, SystolicBP: 130, DiastolicBP: 70, BS: 7.7, BodyTemp: 98.0, HeartRate: 78\nWhat is the pregnancy risk?<end_of_turn>\n<start_of_turn>model\nmid risk<end_of_turn>\n'},
 {'text': '<bos><start_of_turn>user\nYou are a medical assistant AI that predicts pregnancy risk.\n\nGiven the following patient details:\nAge: 30, SystolicBP: 140, DiastolicBP: 100, BS: 15.0, BodyTemp: 98.0, HeartRate: 70\nWhat is the pregnancy risk?<end_of_turn>\n<start_of_turn>model\nhigh risk<end_of_turn>\n'},
 {'text': '<bos><start_of_turn>user\nYou are a medical assistant AI that predicts pregnancy risk.\n\nGiven the following patient details:\nAge: 50, SystolicBP: 140, DiastolicBP: 95, BS: 17.0, BodyTemp: 98.0, HeartRate: 60\nWhat is the pregnancy risk?<end_of_turn>\n<start_of_turn>model\nhigh risk<end_of_turn>\n'},
 {'text': '<bos><start_of_turn>user\nYou are a medical a

In [None]:
# convert to dataset
from datasets import Dataset
train_dataset = Dataset.from_list(train)
eval_dataset = Dataset.from_list(test)

## TRAINING

In [None]:
# set up the model trainig cinfigurations
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    use_cache = False,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        #num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 85,       learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "paged_adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",

        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/900 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# Import the utility from Unsloth to enable training on just the assistant's responses
from unsloth.chat_templates import train_on_responses_only

# Modify the trainer to optimize only on the assistant's responses, not the user input or system message
trainer = train_on_responses_only(
    trainer,  # Your existing HuggingFace Trainer object
    instruction_part = "<start_of_turn>user\n",   # Marker for where user instructions begin
    response_part = "<start_of_turn>model\n",     # Marker for where model responses begin
)


Map (num_proc=4):   0%|          | 0/900 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# checking one row of the dataset
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

'<bos><bos><start_of_turn>user\nYou are a medical assistant AI that predicts pregnancy risk.\n\nGiven the following patient details:\nAge: 12, SystolicBP: 95, DiastolicBP: 60, BS: 7.2, BodyTemp: 98.0, HeartRate: 77\nWhat is the pregnancy risk?<end_of_turn>\n<start_of_turn>model\nlow risk<end_of_turn>\n'

In [None]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[100]["labels"]]).replace(tokenizer.pad_token, " ")

'                                                                                 low risk<end_of_turn>\n'

In [None]:
train_dataset

Dataset({
    features: ['text'],
    num_rows: 900
})

In [None]:
# @title Show current memory stats
import gc
gc.collect()
torch.cuda.empty_cache()
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
10.988 GB of memory reserved.


In [None]:
# for memory conservation
import torch
torch._dynamo.config.cache_size_limit = 128  # or 256, or 512 depending on your RAM


In [None]:
#training
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 85
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 10,567,680 of 5,450,005,952 (0.19% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,20.3783
2,20.6096
3,21.1923
4,22.1069
5,19.5055
6,20.2773
7,21.4296
8,21.9369
9,19.5244
10,20.9112


In [None]:
#@title Show final memory and time stats
gc.collect()
torch.cuda.empty_cache()
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

462.507 seconds used for training.
7.71 minutes used for training.
Peak reserved memory = 10.988 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 74.54 %.
Peak reserved memory for training % of max memory = 0.0 %.


## TESTING

In [None]:
row = data.iloc[1013]  # Get the 1000th row (not [[1000]] which returns a DataFrame)

# Create a chat-style message
patient_message = f"""Determine the risk level of their pregnancy and return the pregennacy risk only with no explanantion

Age: {row['Age']}
SystolicBP: {row['SystolicBP']}
DiastolicBP: {row['DiastolicBP']}
Blood Sugar: {row['BS']}
Body Temperature: {row['BodyTemp']}
Heart Rate: {row['HeartRate'] }

What is the pregnancy risk level for this individual?"""


In [None]:
messages = [
    {
        "role": "user",
        "content": [{"type": "text", "text": patient_message}]
    }
]


In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)

inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
    tokenize = True,
    return_dict = True,
).to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens = 256, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)

['<bos><start_of_turn>user\nDetermine the risk level of their pregnancy and return the pregennacy risk only with no explanantion\n\nAge: 48\nSystolicBP: 140\nDiastolicBP: 90\nBlood Sugar: 15.0\nBody Temperature: 98.0\nHeart Rate: 90\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nHigh Risk<end_of_turn>']

In [None]:
# remainig data for testing
rem = data[1000:]
rem

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel,chat_prompt
1000,34,85,60,11.0,102.0,86,high risk,<bos><start_of_turn>user\nYou are a medical as...
1001,35,120,60,6.9,98.0,70,low risk,<bos><start_of_turn>user\nYou are a medical as...
1002,28,85,60,9.0,101.0,86,mid risk,<bos><start_of_turn>user\nYou are a medical as...
1003,15,120,80,6.8,98.0,70,low risk,<bos><start_of_turn>user\nYou are a medical as...
1004,49,140,90,15.0,98.0,90,high risk,<bos><start_of_turn>user\nYou are a medical as...
1005,50,120,80,15.0,98.0,70,high risk,<bos><start_of_turn>user\nYou are a medical as...
1006,40,140,100,18.0,98.0,90,high risk,<bos><start_of_turn>user\nYou are a medical as...
1007,15,120,80,6.6,99.0,70,low risk,<bos><start_of_turn>user\nYou are a medical as...
1008,29,130,70,7.7,98.0,78,mid risk,<bos><start_of_turn>user\nYou are a medical as...
1009,50,140,90,15.0,98.0,90,high risk,<bos><start_of_turn>user\nYou are a medical as...


In [None]:
from unsloth.chat_templates import get_chat_template

# Prepare tokenizer once (outside the function)
tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

def predict_pregnancy_risk(row, model, tokenizer, max_new_tokens=256):
    """
    Generates a pregnancy risk prediction using the given model and vitals in a row.

    Parameters:
        row (pd.Series): A row from your DataFrame containing vitals.
        model (transformers.PreTrainedModel): The loaded Gemma model.
        tokenizer (transformers.PreTrainedTokenizer): Tokenizer with chat template.
        max_new_tokens (int): Max number of tokens to generate.

    Returns:
        str: Decoded model output (e.g., "low risk", "high risk").
    """
    # Build the user prompt
    patient_message = f"""Please respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only

Age: {row['Age']}
SystolicBP: {row['SystolicBP']}mmHg
DiastolicBP: {row['DiastolicBP']}mmHg
Blood Sugar: {row['BS']}mmol/L
Body Temperature: {row['BodyTemp']}°F
Heart Rate: {row['HeartRate']} bpm\n

What is the pregnancy risk level for this individual?"""

    messages = [
        {
            "role": "user",
            "content": [{"type": "text", "text": patient_message}]
        }
    ]

    # Tokenize with chat template
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        tokenize=True,
        return_dict=True,
    ).to("cuda")

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=1.0,
        top_p=0.95,
        top_k=64,
    )

    # Decode and return
    return tokenizer.batch_decode(outputs)
    #, skip_special_tokens=True)[0].strip()


In [None]:
row = data.iloc[1007]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 15\nSystolicBP: 120mmHg\nDiastolicBP: 80mmHg\nBlood Sugar: 6.6mmol/L\nBody Temperature: 99.0°F\nHeart Rate: 70 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nHigh Risk \n\nPregnancy Risk: High Risk<end_of_turn>']

In [None]:
row = data.iloc[1006]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 40\nSystolicBP: 140mmHg\nDiastolicBP: 100mmHg\nBlood Sugar: 18.0mmol/L\nBody Temperature: 98.0°F\nHeart Rate: 90 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nHigh Risk<end_of_turn>']

In [None]:
row = data.iloc[1001]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 35\nSystolicBP: 120mmHg\nDiastolicBP: 60mmHg\nBlood Sugar: 6.9mmol/L\nBody Temperature: 98.0°F\nHeart Rate: 70 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nLow\n\nPregnancy Risk Level: Low<end_of_turn>']

In [None]:
row = data.iloc[1002]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 28\nSystolicBP: 85mmHg\nDiastolicBP: 60mmHg\nBlood Sugar: 9.0mmol/L\nBody Temperature: 101.0°F\nHeart Rate: 86 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nModerate\n\nPregnancy Risk: Moderate<end_of_turn>']

In [None]:
row = data.iloc[1011]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 17\nSystolicBP: 90mmHg\nDiastolicBP: 63mmHg\nBlood Sugar: 6.9mmol/L\nBody Temperature: 101.0°F\nHeart Rate: 70 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nHigh Risk\n<end_of_turn>']

In [None]:
row = data.iloc[1012]
predict_pregnancy_risk(row,model, tokenizer)

['<bos><start_of_turn>user\nPlease respond **in English**. Determine the risk level of their pregnancy and return the pregnancy risk  with no explanation in English only\n\nAge: 65\nSystolicBP: 130mmHg\nDiastolicBP: 80mmHg\nBlood Sugar: 15.0mmol/L\nBody Temperature: 98.0°F\nHeart Rate: 86 bpm\n\n\nWhat is the pregnancy risk level for this individual?<end_of_turn>\n<start_of_turn>model\nHigh Risk\n<end_of_turn>']

## SAVING

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("HF_TOKEN")

In [None]:
if True: # Change to True to upload finetune
    model.push_to_hub_merged(
        "TechBuz/gemma-3N-risk_predictor", tokenizer,
        token = secret_value_0
    )

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...a-3N-risk_predictor/tokenizer.model:  96%|#########6| 4.53MB / 4.70MB            

  ...ma-3N-risk_predictor/tokenizer.json: 100%|##########| 33.4MB / 33.4MB            

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00001-of-00003.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Downloading safetensors index for unsloth/gemma-3n-e2b-it...


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Unsloth: Merging weights into 16bit:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/3.08G [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...or/model-00001-of-00003.safetensors:   2%|1         | 50.3MB / 3.08GB            

Unsloth: Merging weights into 16bit:  33%|███▎      | 1/3 [00:47<01:35, 47.86s/it]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...or/model-00002-of-00003.safetensors:   1%|1         | 50.3MB / 4.98GB            

Unsloth: Merging weights into 16bit:  67%|██████▋   | 2/3 [02:13<01:09, 69.87s/it]

model-00003-of-00003.safetensors:   0%|          | 0.00/2.82G [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...or/model-00003-of-00003.safetensors:   1%|          | 25.1MB / 2.82GB            

Unsloth: Merging weights into 16bit: 100%|██████████| 3/3 [03:20<00:00, 66.84s/it]


In [None]:
model.push_to_hub("TechBuz/gemma-3N-finetune15", token =secret_value_0) # Online saving
tokenizer.push_to_hub("TechBuz/gemma-3N-finetune15", token = secret_value_0) # Online saving

README.md:   0%|          | 0.00/604 [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...pasohnhay/adapter_model.safetensors: 100%|##########| 42.3MB / 42.3MB            

Saved model to https://huggingface.co/TechBuz/gemma-3N-finetune15


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpemte0vsu/tokenizer.model      : 100%|##########| 4.70MB / 4.70MB            

  /tmp/tmpemte0vsu/tokenizer.json       :  75%|#######5  | 25.1MB / 33.4MB            

In [None]:
if True: # Change to True to save to GGUF
    model.save_pretrained_gguf(
        "/content/gemma-3N-finetune",
        quantization_type = "F16", # For now only Q8_0, BF16, F16 supported
    )