In [None]:
# Google Colabでの設定
google_colab = True

if google_colab:
    from google.colab import drive
    from google.colab import userdata

    drive.mount("/content/drive")

    # ディレクトリ移動
    %cd /content/drive/MyDrive/Python/kaggle_map/src

In [None]:
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

In [None]:
class CFG:
    """マージ設定管理クラス"""

    # ============== 実験情報 =============
    comp_name = "kaggle_map"
    exp_name = "exp026_qwen2.5-14b-lora-softlabel"
    model_name = "Qwen/Qwen2.5-14B-Instruct"

    # ============== パス設定 =============
    adapter_path = f"{exp_name}/output/model"
    save_dir = f"validation/{exp_name}/model"
    all_completions_path = f"{exp_name}/output"

    # ============== デバイス設定 =============
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# 保存先ディレクトリの作成
os.makedirs(CFG.save_dir, exist_ok=True)

## モデルとLoRAアダプターのマージ

In [None]:
# ベースモデルの読み込み
model = AutoModelForCausalLM.from_pretrained(
    CFG.model_name,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# トークナイザーの読み込み
tokenizer = AutoTokenizer.from_pretrained(
    CFG.adapter_path,
    trust_remote_code=True
)

In [None]:
# Embedding size before resizing
print(f"Model embeddings size before resizing: {model.get_input_embeddings().num_embeddings}")

model.resize_token_embeddings(len(tokenizer))
print(f"Resized model embeddings to {len(tokenizer)} tokens")

# Embedding size after resizing
print(f"Model embeddings size after resizing: {model.get_input_embeddings().num_embeddings}")

In [None]:
# LoRAアダプターの読み込み
print(f"Loading LoRA adapter from {CFG.adapter_path}...")
model = PeftModel.from_pretrained(
    model,
    CFG.adapter_path,
    torch_dtype=torch.bfloat16
)

In [None]:
# モデルのマージ
print("Merging LoRA adapter with base model...")
merged_model = model.merge_and_unload(safe_merge=True)
print("Merge completed successfully")

In [None]:
# マージされたモデルの保存
print(f"Saving merged model to {CFG.save_dir}...")
merged_model.save_pretrained(CFG.save_dir)
tokenizer.save_pretrained(CFG.save_dir)
print("Model saved successfully!")

In [None]:
import shutil
import os

source_file = os.path.join(CFG.all_completions_path, "all_completions.json")
destination_file = os.path.join(CFG.save_dir, "all_completions.json")

try:
    shutil.copyfile(source_file, destination_file)
    print(f"Successfully copied {source_file} to {destination_file}")
except FileNotFoundError:
    print(f"Error: Source file not found at {source_file}")
except Exception as e:
    print(f"An error occurred during copying: {e}")

In [None]:
# 保存確認
import os
print("\nSaved files:")
for file in os.listdir(CFG.save_dir):
    file_path = os.path.join(CFG.save_dir, file)
    file_size = os.path.getsize(file_path) / (1024**3)  # Convert to GB
    print(f"  {file}: {file_size:.2f} GB")

In [None]:
import time
from google.colab import runtime

def disconnect_runtime_after_timeout(timeout=3600):
    print(f"ランタイムが{timeout // 60}分後に自動で切断されます。")
    time.sleep(timeout)
    print("ランタイムを切断します...")
    runtime.unassign()

disconnect_runtime_after_timeout(600)