<a href="https://colab.research.google.com/github/Gallifantjack/llm_teaching/blob/main/MedQA_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes

In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = (
    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
)
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

dataset_name = "GBaker/MedQA-USMLE-4-options-hf"
model_name = "unsloth/Qwen2-0.5b-bnb-4bit"
# model_name= "unsloth/Qwen2-1.5b-bnb-4bit"
# model_name= "unsloth/Qwen2-7b-bnb-4bit"
# model_name= "unsloth/Qwen2-72b-bnb-4bit"

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [3]:
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",  # New Mistral v3 2x faster!
    "unsloth/llama-3-8b-bnb-4bit",  # Llama-3 15 trillion tokens model 2x faster!
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",  # Phi-3 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/Qwen2-0.5b-bnb-4bit",  # Qwen2 2x faster!
    "unsloth/Qwen2-1.5b-bnb-4bit",
    "unsloth/Qwen2-7b-bnb-4bit",
    "unsloth/Qwen2-72b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",  # Gemma 2.2x faster!
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Qwen2 patching release 2024.7
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/457M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

unsloth/Qwen2-0.5b-bnb-4bit does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

Unsloth 2024.7 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.


## Add some prefix or suffix to each of the training questions from MedQA

In [7]:
# Define the prompt template
medqa_prompt = """Below is a medical question paired with multiple answer choices. Write a response that selects the best answer.

### Question:
{prefix}{question}{suffix}

### Choices:
{choices}

### Response:
{answer}"""

EOS_TOKEN = tokenizer.eos_token  # Make sure this is defined

def formatting_prompts_func(examples, prefix="", suffix=""):
    questions = examples["sent1"]
    choices = ["\n".join([examples["ending0"][i], examples["ending1"][i], examples["ending2"][i], examples["ending3"][i]]) for i in range(len(questions))]
    answers = [examples[f"ending{examples['label'][i]}"][i] for i in range(len(questions))]

    texts = []
    for question, choice, answer in zip(questions, choices, answers):
        text = medqa_prompt.format(
            prefix=prefix,
            question=question,
            suffix=suffix,
            choices=choice,
            answer=answer
        ) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

# Load and preprocess dataset
from datasets import load_dataset

def prepare_dataset(dataset_name, prefix="", suffix="", split="train"):
    dataset = load_dataset(dataset_name, split=split)

    # Check if the dataset has the expected structure
    expected_columns = ["sent1", "ending0", "ending1", "ending2", "ending3", "label"]
    if not all(col in dataset.column_names for col in expected_columns):
        raise ValueError(f"Dataset {dataset_name} does not have the expected columns. Expected: {expected_columns}, Got: {dataset.column_names}")

    dataset = dataset.map(
        lambda examples: formatting_prompts_func(examples, prefix, suffix),
        batched=True,
        remove_columns=dataset.column_names  # Remove original columns after formatting
    )
    return dataset

In [9]:
# Usage example
try:
    dataset = prepare_dataset(
        dataset_name=dataset_name,  # Make sure this is the correct dataset name
        prefix="These patients are originally from Zurich, Switzerland: ",
        suffix=" Provide your answers with respect to the Swiss population.",
        split="train"
    )
    print(f"Dataset prepared successfully. Number of examples: {len(dataset)}")
    print("First example:", dataset[0]['text'][:500] + "...")  # Print first 500 characters of the first example
except Exception as e:
    print(f"An error occurred while preparing the dataset: {str(e)}")

Map:   0%|          | 0/10178 [00:00<?, ? examples/s]

Dataset prepared successfully. Number of examples: 10178
First example: Below is a medical question paired with multiple answer choices. Write a response that selects the best answer.

### Question:
These patients are originally from Zurich, Switzerland: A 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood p...


<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [10]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/10178 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [11]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA L4. Max memory = 22.168 GB.
1.213 GB of memory reserved.


In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 10,178 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 8,798,208


Step,Training Loss
1,2.5048
2,2.5584
3,2.5676
4,2.4144
5,2.5168
6,2.3589
7,2.2529
8,2.1774
9,2.1353
10,1.9312


In [13]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

102.0309 seconds used for training.
1.7 minutes used for training.
Peak reserved memory = 2.635 GB.
Peak reserved memory for training = 1.422 GB.
Peak reserved memory % of max memory = 11.887 %.
Peak reserved memory for training % of max memory = 6.415 %.


<a name="Inference"></a>
### Inference
Let's run the model! You can change the instruction and input - leave the output blank!

In [32]:
test_dataset = load_dataset(dataset_name, split="test")

def evaluate_model(model, tokenizer, dataset, num_samples=100):
    correct = 0
    total = 0

    for i in range(min(num_samples, len(dataset))):
        example = dataset[i]
        prefix = "Consider the following medical scenario based in India: "
        question = example["sent1"]
        suffix = " Please provide your professional medical opinion for this country."
        choices = "\n".join(
            [
                example["ending0"],
                example["ending1"],
                example["ending2"],
                example["ending3"],
            ]
        )
        correct_answer = example[f"ending{example['label']}"]

        prompt = medqa_prompt.format(
            prefix=prefix,
            question=question,
            suffix=suffix,
            choices=choices
        )

        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=50)

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predicted_answer = response.split("Response:")[-1].strip()

        if predicted_answer.lower() in correct_answer.lower():
            correct += 1
        total += 1

        if i % 10 == 0:
            print(f"Processed {i+1} samples...")

    accuracy = correct / total
    return accuracy

# Evaluate the model
accuracy = evaluate_model(model, tokenizer, test_dataset)
print(f"Model accuracy on test set: {accuracy:.2%}")
print("Evaluation completed!")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 1 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 11 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 21 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 31 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 41 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 51 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 61 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 71 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 81 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Processed 91 samples...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Model accuracy on test set: 21.00%
Evaluation completed!


In [None]:
# Define the prompt template (make sure this matches what you used for training)
medqa_prompt = """Below is a medical question paired with multiple answer choices. Write a response that selects the best answer.

### Question:
{prefix}{question}{suffix}

### Choices:
{choices}

### Response:
"""

FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!

# Example usage
prefix = "Consider the following medical scenario based in India: "
question = "What is the most likely diagnosis for a patient presenting with longstanding cough, back pain and weight loss?"
suffix = " Please provide your professional medical opinion for this country."
choices = """
A. Lung Cancer
B. Pulmonary embolism
C. Heart Disease
D. Tuberculosis
"""

input_text = medqa_prompt.format(
    prefix=prefix,
    question=question,
    suffix=suffix,
    choices=choices
)

inputs = tokenizer([input_text], return_tensors="pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

print("Model's response:")
print(response.split("Response:")[-1].strip())

In [35]:
import json
from datetime import datetime

def user_inference(model, tokenizer, save_file="user_qa_log.json"):
    qa_log = []

    while True:
        prefix = input("Enter a prefix (or press Enter for default): ") or "Consider the following medical scenario: "
        question = input("Enter your medical question: ")
        suffix = input("Enter a suffix (or press Enter for default): ") or " Please provide your professional medical opinion."

        print("Enter your answer choices (press Enter after each choice, type 'done' when finished):")
        choices = []
        while True:
            choice = input()
            if choice.lower() == 'done':
                break
            choices.append(choice)

        choices_str = "\n".join(choices)

        prompt = medqa_prompt.format(prefix=prefix, question=question, suffix=suffix, choices=choices_str)
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=100)

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predicted_answer = response.split("Response:")[-1].strip()

        print("\nModel's response:")
        print(predicted_answer)

        # Log the Q&A
        qa_entry = {
            "timestamp": datetime.now().isoformat(),
            "prefix": prefix,
            "question": question,
            "suffix": suffix,
            "choices": choices,
            "model_response": predicted_answer
        }
        qa_log.append(qa_entry)

        # Save to file
        with open(save_file, 'w') as f:
            json.dump(qa_log, f, indent=2)
        print(f"Q&A saved to {save_file}")

        another = input("\nDo you want to ask another question? (yes/no): ")
        if another.lower() != 'yes':
            break

    print(f"All Q&As have been saved to {save_file}")


# Uncomment the following line if you want to allow user interaction after evaluation
user_inference(model, tokenizer)

Enter a prefix (or press Enter for default): the patient is female
Enter your medical question: what is the most likely diagnosis
Enter a suffix (or press Enter for default): they are in india
Enter your answer choices (press Enter after each choice, type 'done' when finished):
tb
aids
cancer
done


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Model's response:
aids
Q&A saved to user_qa_log.json

Do you want to ask another question? (yes/no): no
All Q&As have been saved to user_qa_log.json


<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [None]:
# Save name
clean_model_name = model_name.replace("/", "_").replace("-", "_")
save_name = f"medqa_{clean_model_name}"

model.save_pretrained(save_name)
tokenizer.save_pretrained(save_name)
model.push_to_hub(f"XXXX/{save_name}", token="XXXX", private=True)  # Local saving
tokenizer.push_to_hub(f"XXXX/{save_name}", token="XXXX", private=True)  # Local saving