In [1]:
# Google Colabでの設定
google_colab = True

if google_colab:
    from google.colab import drive
    from google.colab import userdata

    drive.mount("/content/drive")

    # ディレクトリ移動
    %cd /content/drive/MyDrive/Python/kaggle_map/src

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Python/kaggle_map/src


In [2]:
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

In [3]:
class CFG:
    """マージ設定管理クラス"""

    # ============== 実験情報 =============
    comp_name = "kaggle_map"
    exp_name = "exp030_qwen2.5-32b-lora-softlabel-fold3"
    model_name = "Qwen/Qwen2.5-32B-Instruct"

    # ============== パス設定 =============
    adapter_path = f"{exp_name}/output/model"
    save_dir = f"validation_v2/{exp_name}/model"
    all_completions_path = f"{exp_name}/output"

    # ============== デバイス設定 =============
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# 保存先ディレクトリの作成
os.makedirs(CFG.save_dir, exist_ok=True)

## モデルとLoRAアダプターのマージ

In [5]:
# ベースモデルの読み込み
model = AutoModelForCausalLM.from_pretrained(
    CFG.model_name,
    dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# トークナイザーの読み込み
tokenizer = AutoTokenizer.from_pretrained(
    CFG.adapter_path,
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/17 [00:00<?, ?it/s]

In [6]:
# Embedding size before resizing
print(f"Model embeddings size before resizing: {model.get_input_embeddings().num_embeddings}")

model.resize_token_embeddings(len(tokenizer))
print(f"Resized model embeddings to {len(tokenizer)} tokens")

# Embedding size after resizing
print(f"Model embeddings size after resizing: {model.get_input_embeddings().num_embeddings}")

Model embeddings size before resizing: 152064
Resized model embeddings to 151729 tokens
Model embeddings size after resizing: 151729


In [7]:
# LoRAアダプターの読み込み
print(f"Loading LoRA adapter from {CFG.adapter_path}...")
model = PeftModel.from_pretrained(
    model,
    CFG.adapter_path,
    torch_dtype=torch.bfloat16
)

Loading LoRA adapter from exp030_qwen2.5-32b-lora-softlabel-fold3/output/model...


In [8]:
# モデルのマージ
print("Merging LoRA adapter with base model...")
merged_model = model.merge_and_unload(safe_merge=True)
print("Merge completed successfully")

Merging LoRA adapter with base model...
Merge completed successfully


In [9]:
# マージされたモデルの保存
print(f"Saving merged model to {CFG.save_dir}...")
merged_model.save_pretrained(CFG.save_dir)
tokenizer.save_pretrained(CFG.save_dir)
print("Model saved successfully!")

Saving merged model to validation_v2/exp030_qwen2.5-32b-lora-softlabel-fold3/model...
Model saved successfully!


In [10]:
import shutil
import os

source_file = os.path.join(CFG.all_completions_path, "all_completions.json")
destination_file = os.path.join(CFG.save_dir, "all_completions.json")

try:
    shutil.copyfile(source_file, destination_file)
    print(f"Successfully copied {source_file} to {destination_file}")
except FileNotFoundError:
    print(f"Error: Source file not found at {source_file}")
except Exception as e:
    print(f"An error occurred during copying: {e}")

Successfully copied exp030_qwen2.5-32b-lora-softlabel-fold3/output/all_completions.json to validation_v2/exp030_qwen2.5-32b-lora-softlabel-fold3/model/all_completions.json


In [11]:
# 保存確認
import os
print("\nSaved files:")
for file in os.listdir(CFG.save_dir):
    file_path = os.path.join(CFG.save_dir, file)
    file_size = os.path.getsize(file_path) / (1024**3)  # Convert to GB
    print(f"  {file}: {file_size:.2f} GB")


Saved files:
  config.json: 0.00 GB
  generation_config.json: 0.00 GB
  model-00001-of-00014.safetensors: 4.55 GB
  model-00002-of-00014.safetensors: 4.54 GB
  model-00003-of-00014.safetensors: 4.54 GB
  model-00004-of-00014.safetensors: 4.54 GB
  model-00005-of-00014.safetensors: 4.54 GB
  model-00006-of-00014.safetensors: 4.54 GB
  model-00007-of-00014.safetensors: 4.54 GB
  model-00008-of-00014.safetensors: 4.54 GB
  model-00009-of-00014.safetensors: 4.54 GB
  model-00010-of-00014.safetensors: 4.54 GB
  model-00011-of-00014.safetensors: 4.54 GB
  model-00012-of-00014.safetensors: 4.54 GB
  model-00013-of-00014.safetensors: 4.54 GB
  model-00014-of-00014.safetensors: 1.97 GB
  model.safetensors.index.json: 0.00 GB
  chat_template.jinja: 0.00 GB
  tokenizer_config.json: 0.00 GB
  special_tokens_map.json: 0.00 GB
  added_tokens.json: 0.00 GB
  vocab.json: 0.00 GB
  merges.txt: 0.00 GB
  tokenizer.json: 0.01 GB
  all_completions.json: 0.00 GB


In [12]:
import time
from google.colab import runtime

def disconnect_runtime_after_timeout(timeout=3600):
    print(f"ランタイムが{timeout // 60}分後に自動で切断されます。")
    time.sleep(timeout)
    print("ランタイムを切断します...")
    runtime.unassign()

disconnect_runtime_after_timeout(600)

ランタイムが10分後に自動で切断されます。
ランタイムを切断します...
