In [1]:
%reset -f
import torch
import gc
torch.cuda.set_device(1)  # GPU番号3を指定
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
!nvidia-smi

Sat Jun 21 14:19:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.153.02             Driver Version: 570.153.02     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A4000               On  |   00000000:4F:00.0 Off |                  Off |
| 41%   30C    P8             14W /  140W |      18MiB /  16376MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A4000               On  |   00

In [2]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

os.environ["HF_HOME"] = "/mnt/dx2_data/s2530074/huggingface"
os.environ["HF_HUB_CACHE"] = "/mnt/dx2_data/s2530074/huggingface/hub"
os.environ["HF_ASSETS_CACHE"] = "/mnt/dx2_data/s2530074/huggingface/assets"

print(os.environ.get("HF_HOME"))
print(os.environ.get("HF_HUB_CACHE"))
print(os.environ.get("HF_ASSETS_CACHE"))

/mnt/dx2_data/s2530074/huggingface
/mnt/dx2_data/s2530074/huggingface/hub
/mnt/dx2_data/s2530074/huggingface/assets


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from bert_score import score

# モデルの一覧
models = {
    "WizardLM-2-8x22B": "alpindale/WizardLM-2-8x22B",
    "LLaMA-2-7b-chat": "meta-llama/Llama-2-7b-chat-hf"
}

# 検証用プロンプトと参照文（理想解）
prompts = [
    "Explain the theory of relativity in simple terms.",
    "What's the capital of Canada?",
    "Write a Python function to compute Fibonacci numbers.",
    "Summarize the main causes of World War II."
]

references = [
    "The theory of relativity explains how time and space are linked for objects moving at a constant speed, especially for those moving close to the speed of light.",
    "The capital of Canada is Ottawa.",
    "def fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)",
    "World War II was caused by the rise of fascist regimes, unresolved issues from World War I, and expansionist policies of Germany, Italy, and Japan."
]

# 応答を生成する関数
def generate_response(model_id, tokenizer, model, prompt, max_new_tokens=200):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# メイン処理
for model_name, model_id in models.items():
    print(f"\n=== Evaluating {model_name} ===")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        trust_remote_code=True
    )
    model.eval()

    outputs = []
    for prompt in prompts:
        output = generate_response(model_id, tokenizer, model, prompt)
        outputs.append(output)

    # BERTScore評価
    print(f"\n--- Results for {model_name} ---")
    P, R, F1 = score(outputs, references, lang='en', verbose=False)

    for i, (prompt, output, ref) in enumerate(zip(prompts, outputs, references)):
        print(f"\nPrompt {i+1}: {prompt}")
        print(f"Model Output:\n{output.strip()}")
        print(f"Reference:\n{ref}")
        print(f"BERTScore F1: {F1[i].item():.4f}")

    avg_f1 = F1.mean().item()
    print(f"\n{model_name} Average BERTScore F1: {avg_f1:.4f}")
