In [1]:
# --------------------------------------------
# Cell 1: Initial imports and environment setup
# --------------------------------------------
import torch
import numpy as np
import os
from tqdm import tqdm
from datasets import load_dataset
from sacrebleu import corpus_bleu
from transformers import AutoModelForCausalLM, AutoTokenizer
import evaluate
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments
)

print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")

os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"

NumPy version: 1.25.2
PyTorch version: 2.5.1+cu118
CUDA available: True
GPU name: NVIDIA A100-SXM4-40GB


In [2]:
# --------------------------------------------
# Cell 2: Load Base Model & Tokenizer (Baseline)
# --------------------------------------------
model_name = "meta-llama/Llama-2-7b-hf"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Loading base model in float16 with device_map='auto'...")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

# Ensure a pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Baseline model and tokenizer loaded.")


Loading tokenizer...
Loading base model in float16 with device_map='auto'...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Baseline model and tokenizer loaded.


In [3]:
# --------------------------------------------
# Cell 3: Prepare WMT19 Training Data (Prompt Masking)
# --------------------------------------------
def build_full_text(example):
    german = example["translation"]["de"]
    english = example["translation"]["en"]
    prompt = (
        "Translate this German text into fluent English.\n"
        f"{german}\n"
        "Translation:"
    )
    full_text = prompt + " " + english
    return {"full_text": full_text}

def load_and_format_wmt(num_examples=10000):
    print(f"Loading WMT19 (de-en) train data with {num_examples} examples...")
    dataset = load_dataset("wmt19", "de-en", split="train")
    dataset = dataset.shuffle(seed=42).select(range(num_examples))
    
    dataset = dataset.map(
        build_full_text,
        desc="Building prompt + target text",
        remove_columns=dataset.column_names
    )
    return dataset

class PromptMaskCollator:
    def __init__(self, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __call__(self, examples):
        texts = [ex["full_text"] for ex in examples]
        
        tokenized = self.tokenizer(
            texts,
            truncation=True,
            max_length=self.max_length,
            padding="max_length",
            return_tensors="pt"
        )
        input_ids = tokenized["input_ids"]
        attention_mask = tokenized["attention_mask"]
        labels = input_ids.clone()
        
        for i, text in enumerate(texts):
            if "Translation:" in text:
                prompt_part, _ = text.split("Translation:", 1)
                prompt_part = prompt_part + "Translation:"
            else:
                prompt_part = text
            
            prompt_ids = self.tokenizer(
                prompt_part,
                truncation=True,
                max_length=self.max_length,
                add_special_tokens=True
            )["input_ids"]
            
            prompt_len = len(prompt_ids)
            if prompt_len > labels.size(1):
                prompt_len = labels.size(1)
            
            # Mask out prompt tokens
            labels[i, :prompt_len] = -100
        
        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

# Load training data for fine-tuning
training_data = load_and_format_wmt(num_examples=10000)
data_collator = PromptMaskCollator(tokenizer, max_length=512)

print("Training data prepared with prompt masking.")


Loading WMT19 (de-en) train data with 100000 examples...


Building prompt + target text:   0%|          | 0/100000 [00:00<?, ? examples/s]

Training data prepared with prompt masking.


In [4]:
# --------------------------------------------
# Cell 4: LoRA Configuration and Model
# --------------------------------------------
from peft import LoraConfig, get_peft_model, TaskType

def setup_lora_model():
    print("Setting up LoRA model...")
    torch.cuda.empty_cache()
    
    lora_base = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16
    )
    # Freeze embeddings
    for param in lora_base.get_input_embeddings().parameters():
        param.requires_grad = False
    
    lora_config = LoraConfig(
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
    )
    
    lora_model = get_peft_model(lora_base, lora_config)
    lora_model.print_trainable_parameters()
    return lora_model

model_for_training = setup_lora_model()

print("LoRA model is ready.")


Setting up LoRA model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 8,388,608 || all params: 6,746,804,224 || trainable%: 0.1243
LoRA model is ready.


In [5]:
# --------------------------------------------
# Cell 5: Training Arguments
# --------------------------------------------
os.makedirs("./my_results", exist_ok=True)  

train_args = TrainingArguments(
    output_dir="./my_results",    
    num_train_epochs=2,           # 1 epoch over 50k lines
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    learning_rate=1e-5,
    fp16=True,
    save_steps=500,
    logging_steps=100,
    weight_decay=0.01,
    warmup_steps=250,
    remove_unused_columns=False
)

print("Training arguments set. Output will be saved to ./my_results")
print(f"Planned: {train_args.num_train_epochs} epochs on ~{len(training_data)} examples.")


Training arguments set. Output will be saved to ./my_results
Planned: 3 epochs on ~100000 examples.


In [6]:
# --------------------------------------------
# Cell 6: Initialize Trainer & Start Fine-tuning
# --------------------------------------------
trainer = Trainer(
    model=model_for_training,
    args=train_args,
    train_dataset=training_data,
    data_collator=data_collator
)

print("Starting LoRA fine-tuning...\n")
train_output = trainer.train()
print("\nTraining finished.")

print("Training metrics:")
print(train_output)

# Save the final fine-tuned LoRA model
trainer.save_model("./my_results/lora_7b")
print("Fine-tuning done. Model saved at ./my_results/lora_7b")


Starting LoRA fine-tuning...



Step,Training Loss
200,30.3881
400,2.573
600,0.0653
800,0.0629
1000,0.0601
1200,0.0603
1400,0.0592
1600,0.0601
1800,0.0594
2000,0.0589



Training finished.
Training metrics:
TrainOutput(global_step=9375, training_loss=0.7584611862182618, metrics={'train_runtime': 37908.6319, 'train_samples_per_second': 7.914, 'train_steps_per_second': 0.247, 'total_flos': 6.0970588176384e+18, 'train_loss': 0.7584611862182618, 'epoch': 3.0})
Fine-tuning done. Model saved at ./my_results/lora_7b


In [7]:
# --------------------------------------------
# Cell 7: Load Evaluation Data
# --------------------------------------------
def load_eval_data(num_examples=500):
    print(f"Loading WMT19 (de-en) validation data with {num_examples} examples...")
    eval_data = load_dataset("wmt19", "de-en", split="validation")
    eval_data = eval_data.select(range(min(num_examples, len(eval_data))))
    return eval_data

eval_dataset = load_eval_data(num_examples=50)  

print("Validation data loaded.")


Loading WMT19 (de-en) validation data with 500 examples...
Validation data loaded.


In [8]:
# --------------------------------------------
# Cell 8: Debugging-Aware Evaluation Function
# --------------------------------------------
def build_prompt_for_translation(german_text: str) -> str:

    prompt = (
        "You are a professional translator specializing in German to English.\n\n"
        "Below is some text in German. Translate it into fluent, idiomatic English.\n\n"
        f"German text:\n{german_text}\n\n"
        "English translation:"
    )
    return prompt

def debug_evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    num_examples=50,      
    debug_print=5,        
    description="Model"
):
    """
    Evaluates the given `model` on `eval_dataset` with a debugging-friendly process:
      - Prints the exact prompt
      - Prints the tokenized input (input_ids)
      - Prints the full model output (with special tokens)
      - Prints the final extracted translation
    Then computes BLEU & COMET metrics as a rough measure of translation quality.
    """
    comet_metric = evaluate.load("comet")

    predictions = []
    references = []
    sources = []

    subset = eval_dataset.select(range(min(num_examples, len(eval_dataset))))
    print(f"\n[DEBUG EVAL] {description} on {num_examples} examples...\n")

    for i, ex in enumerate(tqdm(subset, desc=f"Evaluating {description}")):
        src_de = ex["translation"]["de"]
        ref_en = ex["translation"]["en"]

        prompt_text = build_prompt_for_translation(src_de)

        tokenized_input = tokenizer(
            prompt_text,
            return_tensors="pt",
            add_special_tokens=True
        ).to(model.device)

        with torch.no_grad():
            output_ids = model.generate(
                **tokenized_input,
                max_new_tokens=256,
                num_beams=4,
                do_sample=False,
                early_stopping=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        full_output_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)

        if "English translation:" in full_output_text:
            pred_en = full_output_text.rsplit("English translation:", 1)[-1].strip()
        else:
            pred_en = full_output_text

        predictions.append(pred_en)
        references.append([ref_en])
        sources.append(src_de)

        if i < debug_print:
            print("\n==========================================")
            print(f"Example {i}")
            print("---------------[ PROMPT ]-----------------")
            print(prompt_text)
            print("--------------[ TOKENIZED ]---------------")
            print(f"Input IDs: {tokenized_input['input_ids'][0].tolist()}")
            print("-----------[ FULL MODEL OUTPUT ]----------")
            print(repr(full_output_text))
            print("-------------[ EXTRACTED EN ]-------------")
            print(repr(pred_en))
            print("--------------[ REFERENCE ]---------------")
            print(ref_en)
            print("==========================================\n")

    # 6) Compute BLEU
    bleu = corpus_bleu(predictions, references)
    print(f"[{description}] BLEU = {bleu.score:.2f}")

    comet_results = comet_metric.compute(
        predictions=predictions,
        references=[r[0] for r in references],
        sources=sources
    )
    print(f"[{description}] COMET = {comet_results['mean_score']:.3f}\n")

    return {
        "predictions": predictions,
        "references": references,
        "bleu": bleu.score,
        "comet": comet_results["mean_score"]
    }


In [9]:
# --------------------------------------------
# Cell 9: Compare Baseline vs. LoRA-Fine-Tuned
# --------------------------------------------
print("Evaluating Baseline Model with Debug Info...")
baseline_debug_results = debug_evaluate_model(
    model=base_model,
    tokenizer=tokenizer,
    eval_dataset=eval_dataset,
    num_examples=20,    
    debug_print=3,      
    description="Baseline LLaMA"
)

print("Evaluating LoRA-Fine-Tuned Model with Debug Info...")
lora_debug_results = debug_evaluate_model(
    model=model_for_training,  
    tokenizer=tokenizer,
    eval_dataset=eval_dataset,
    num_examples=20,
    debug_print=3,
    description="LoRA Fine-Tuned"
)

print("Comparison of final metrics:")
print(f"Baseline -> BLEU = {baseline_debug_results['bleu']:.2f}, COMET = {baseline_debug_results['comet']:.3f}")
print(f"LoRA     -> BLEU = {lora_debug_results['bleu']:.2f}, COMET = {lora_debug_results['comet']:.3f}")


Evaluating Baseline Model with Debug Info...


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']



[DEBUG EVAL] Baseline LLaMA on 50 examples...



Evaluating Baseline LLaMA:   2%|█▏                                                       | 1/50 [00:04<03:24,  4.18s/it]


Example 0
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
München 1856: Vier Karten, die Ihren Blick auf die Stadt verändern

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29924, 3346, 2724, 29871, 29896, 29947, 29945, 29953, 29901, 23650, 476, 8109, 29892, 762, 306, 13608, 350, 1406, 1622, 762, 5587, 1147, 3140, 824, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
'<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nMünchen 185

Evaluating Baseline LLaMA:   4%|██▎                                                      | 2/50 [00:04<01:38,  2.06s/it]


Example 1
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
Eine Irren-Anstalt, wo sich heute Jugendliche begegnen sollen.

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29923, 457, 6600, 1267, 29899, 2744, 303, 1997, 29892, 8879, 2160, 12843, 19472, 4545, 1812, 387, 4566, 899, 2435, 29889, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
"<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nEine Irren-Anstalt, wo sich heute Jugendl

Evaluating Baseline LLaMA:   6%|███▍                                                     | 3/50 [00:05<01:06,  1.41s/it]


Example 2
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
Eine Gruftkapelle, wo nun für den S-Bahn-Tunnel gegraben wird.

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29923, 457, 5430, 615, 21474, 1808, 29892, 8879, 11923, 1865, 972, 317, 29899, 29933, 5422, 29899, 29911, 16163, 21598, 336, 1785, 4296, 29889, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
'<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nEine Gruftkapelle, 

Evaluating Baseline LLaMA: 100%|████████████████████████████████████████████████████████| 50/50 [01:12<00:00,  1.45s/it]


[Baseline LLaMA] BLEU = 47.92


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if poss

[Baseline LLaMA] COMET = 0.808

Evaluating LoRA-Fine-Tuned Model with Debug Info...


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/f49d328952c3470eff6bb6f545d62bfdb6e66304/checkpoints/model.ckpt`
Encoder model frozen.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']



[DEBUG EVAL] LoRA Fine-Tuned on 50 examples...



Evaluating LoRA Fine-Tuned:   2%|█                                                       | 1/50 [00:01<00:59,  1.20s/it]


Example 0
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
München 1856: Vier Karten, die Ihren Blick auf die Stadt verändern

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29924, 3346, 2724, 29871, 29896, 29947, 29945, 29953, 29901, 23650, 476, 8109, 29892, 762, 306, 13608, 350, 1406, 1622, 762, 5587, 1147, 3140, 824, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
'<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nMünchen 185

Evaluating LoRA Fine-Tuned:   4%|██▏                                                     | 2/50 [00:02<00:48,  1.01s/it]


Example 1
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
Eine Irren-Anstalt, wo sich heute Jugendliche begegnen sollen.

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29923, 457, 6600, 1267, 29899, 2744, 303, 1997, 29892, 8879, 2160, 12843, 19472, 4545, 1812, 387, 4566, 899, 2435, 29889, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
'<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nEine Irren-Anstalt, wo sich heute Jugendl

Evaluating LoRA Fine-Tuned:   6%|███▎                                                    | 3/50 [00:03<00:46,  1.01it/s]


Example 2
---------------[ PROMPT ]-----------------
You are a professional translator specializing in German to English.

Below is some text in German. Translate it into fluent, idiomatic English.

German text:
Eine Gruftkapelle, wo nun für den S-Bahn-Tunnel gegraben wird.

English translation:
--------------[ TOKENIZED ]---------------
Input IDs: [1, 887, 526, 263, 10257, 5578, 1061, 4266, 5281, 297, 5332, 304, 4223, 29889, 13, 13, 21140, 340, 338, 777, 1426, 297, 5332, 29889, 4103, 9632, 372, 964, 1652, 8122, 29892, 1178, 14910, 2454, 4223, 29889, 13, 13, 29954, 3504, 1426, 29901, 13, 29923, 457, 5430, 615, 21474, 1808, 29892, 8879, 11923, 1865, 972, 317, 29899, 29933, 5422, 29899, 29911, 16163, 21598, 336, 1785, 4296, 29889, 13, 13, 24636, 13962, 29901]
-----------[ FULL MODEL OUTPUT ]----------
'<s> You are a professional translator specializing in German to English.\n\nBelow is some text in German. Translate it into fluent, idiomatic English.\n\nGerman text:\nEine Gruftkapelle, 

Evaluating LoRA Fine-Tuned: 100%|███████████████████████████████████████████████████████| 50/50 [01:15<00:00,  1.50s/it]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[LoRA Fine-Tuned] BLEU = 47.92


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[LoRA Fine-Tuned] COMET = 0.808

Comparison of final metrics:
Baseline -> BLEU = 47.92, COMET = 0.808
LoRA     -> BLEU = 47.92, COMET = 0.808


In [10]:
# --------------------------------------------
# Cell 10: Resume from Saved LoRA Checkpoint
# --------------------------------------------
from peft import PeftModel

def load_lora_model(checkpoint_path="./my_results/lora_7b"):
    """
    Loads the saved LoRA weights from disk, merges them with the base model, 
    and returns a ready-for-inference model.
    """
    print(f"Loading LoRA model from {checkpoint_path}...")
    # Load base again
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16
    )
    # Merge LoRA
    lora_model_loaded = PeftModel.from_pretrained(base, checkpoint_path)
    return lora_model_loaded

# resumed_lora_model = load_lora_model("./my_results/lora_7b")
# debug_evaluate_model(resumed_lora_model, tokenizer, eval_dataset, num_examples=10, description="Resumed LoRA")


In [14]:
print("ID of base_model:", id(base_model))
print("ID of model_for_training:", id(model_for_training))


ID of base_model: 139838127032592
ID of model_for_training: 139838134663760
