In [None]:
!pip install -q -U accelerate transformers peft

In [None]:
!git config --global credential.helper store

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Cell 1: Imports
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

In [None]:
# Cell 2: Parameters (replace the values with your actual paths and preferences)
base_model_name_or_path = "YOUR_BASE_MODEL_PATH_OR_NAME"
peft_model_path = "YOUR_PEFT_MODEL_PATH"
output_dir = "YOUR_OUTPUT_DIRECTORY"
device = "auto"  # or specify a specific device like "cuda:0"
push_to_hub = False  # or True if you want to push to the hub

In [None]:
# Cell 3: Load Base Model
print(f"Loading base model: {base_model_name_or_path}")
if device == 'auto':
    device_arg = { 'device_map': 'auto' }
else:
    device_arg = { 'device_map': { "": device} }

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name_or_path,
    return_dict=True,
    torch_dtype=torch.float16,
    **device_arg
)

In [None]:
# Cell 4: Load PEFT and Merge
print(f"Loading PEFT: {peft_model_path}")
model = PeftModel.from_pretrained(base_model, peft_model_path, **device_arg)
print(f"Running merge_and_unload")
model = model.merge_and_unload()

# Cell 5: Tokenizer and Saving
tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path)

if push_to_hub:
    print(f"Saving to hub ...")
    model.push_to_hub(f"{output_dir}", use_temp_dir=False)
    tokenizer.push_to_hub(f"{output_dir}", use_temp_dir=False)
else:
    model.save_pretrained(f"{output_dir}")
    tokenizer.save_pretrained(f"{output_dir}")
    print(f"Model saved to {output_dir}")