### 커스텀 OPT 및 Huggingface OPT 로드

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers.models.opt.modeling_opt import OPTLearnedPositionalEmbedding
from typing import Optional, Tuple, List

from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from custom_opt.opt import CustomOPTModel
from custom_opt.load import load_hugginface_opt, load_custom_opt

In [3]:
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
tokenizer_a, model_a = load_hugginface_opt(device=device)
tokenizer_b, model_b = load_custom_opt(device=device)

model_a.eval()
model_b.eval()

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


CustomOPTModel(
  (embed_tokens): Embedding(50272, 768, padding_idx=1)
  (embed_positions): OPTLearnedPositionalEmbedding(2050, 768)
  (layers): ModuleList(
    (0-11): 12 x OPTDecoderLayer(
      (self_attn): OPTAttention(
        (q_proj): Linear(in_features=768, out_features=768, bias=True)
        (k_proj): Linear(in_features=768, out_features=768, bias=True)
        (v_proj): Linear(in_features=768, out_features=768, bias=True)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
      )
      (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (activation_fn): ReLU()
    )
  )
  (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (lm_head): Linear(in_features=768, out_features=50272, bias=False)

In [4]:
model_a

OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 768, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 768)
      (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (layers): ModuleList(
        (0-11): 12 x OPTDecoderLayer(
          (self_attn): OPTSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (activation_fn): ReLU()
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,)

In [5]:
import torch
from transformers import AutoTokenizer

# 1. 토크나이저 준비 (OPT 기반)
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m", use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # OPT는 pad_token을 eos_token으로 설정

# 2. 입력 문장
input_text = "This is a"

# 3. 텍스트를 토크나이즈해서 텐서로 변환
input_ids = tokenizer(input_text, return_tensors="pt").input_ids  # shape: [1, seq_len]

# 4. 두 모델 모두 eval 모드로 전환
model_a.eval()
model_b.eval()

print(model_a.device)
print(model_b.device)

# 5. 추론
with torch.no_grad():
    logits_a = model_a(input_ids.to(model_a.device))  # device 맞추기
    logits_b = model_b(input_ids.to(model_b.device))

# 6. 각 모델에서 다음 토큰 예측
next_token_id_a = torch.argmax(logits_a.logits[:, -1, :], dim=-1)  # 마지막 토큰 기준
next_token_id_b = torch.argmax(logits_a.logits[:, -1, :], dim=-1)

# 7. 디코딩해서 보기
next_token_a = tokenizer.decode(next_token_id_a)
next_token_b = tokenizer.decode(next_token_id_b)

print(f"model_a가 예측한 다음 토큰: {next_token_a}")
print(f"model_b가 예측한 다음 토큰: {next_token_b}")

cuda:0
cuda
model_a가 예측한 다음 토큰:  great
model_b가 예측한 다음 토큰:  great


In [6]:
# 입력 문장
input_text = "What happened to me?"

# 텍스트를 토크나이징
inputs = tokenizer(input_text, return_tensors="pt").to(device)

# 모델에 넣어 출력 생성 (greedy decoding: 가장 높은 확률 토큰을 선택)
output = model_a.generate(
    **inputs,
    max_length=500,    # 최대 생성 길이 설정
    do_sample=False,  # 무작위성 없이 가장 확률 높은 것 선택
)

# 출력 토큰을 다시 텍스트로 디코딩
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print(generated_text)

What happened to me?
I'm not sure. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if

In [7]:
# 입력 문장
input_text = "What happened to me?"

# 텍스트를 토크나이징
inputs = tokenizer(input_text, return_tensors="pt").to(device)

# 모델에 넣어 출력 생성 (greedy decoding: 가장 높은 확률 토큰을 선택)
output = model_b.generate(
    **inputs,
    max_length=500,    # 최대 생성 길이 설정
    do_sample=False,  # 무작위성 없이 가장 확률 높은 것 선택
)

# 출력 토큰을 다시 텍스트로 디코딩
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print(generated_text)

What happened to me?
I'm not sure. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if it's just me. I'm not sure if it's just me or if