## Merging of LoRA Adapters with Base Models

## Prepare environment

In [None]:
%pip install datasets transformers peft

In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

# set to where you want to save model checkpoints and logs
%cd /content/drive/MyDrive/AITA_FINETUNED_MODELS

In [None]:
from huggingface_hub import login

login()

## Set merging parameters

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
)
from peft import PeftModel

In [None]:
model_name = "meta-llama/Llama-2-13b-chat-hf"
peft_model_directory = "binary-classification/top-2k/llama-2-13b-chat-reddit-AITA-binary-top-2k"
device_map = "auto"
hf_repo_name = "MattBoraske/llama-2-13b-chat-reddit-AITA-binary-top-2k"

## Llama-2 merging

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, peft_model_directory)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = '[PAD]'
tokenizer.padding_side = "right"

In [None]:
model.push_to_hub(hf_repo_name)
tokenizer.push_to_hub(hf_repo_name)

## Flan-T5 merging

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, peft_model_directory)
model = model.merge_and_unload()

# Load base tokenizer to save in HuggingFace repo
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
model.push_to_hub(hf_repo_name)
tokenizer.push_to_hub(hf_repo_name)