In [1]:
import torch

In [2]:
from models.gpt2 import GPT2Model

In [3]:
from transformers import GPT2Model as OpenAIGPT2Model
from utils import model_size_to_params

In [4]:
def test_gpt2(model_size='gpt2'):
  sent_ids = torch.tensor([[101, 7592, 2088, 102, 0, 0, 0, 0],
                           [101, 7592, 15756, 2897, 2005, 17953, 2361, 102]])
  att_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1]])

  # OpenAI 모델과 자신의 모델을 모두 로드한다.
  openai_model = OpenAIGPT2Model.from_pretrained(model_size)
  gpt = GPT2Model.from_pretrained(model=model_size, **model_size_to_params(model_size))

  outputs = gpt(sent_ids, att_mask)
  openai_outputs = openai_model(input_ids=sent_ids, attention_mask=att_mask, output_hidden_states=True).hidden_states[-1]

  att_mask = att_mask.unsqueeze(-1)
  outputs['last_hidden_state'] = outputs['last_hidden_state'] * att_mask
  openai_outputs *= att_mask

  assert torch.allclose(outputs['last_hidden_state'], openai_outputs, atol=1e-1, rtol=1e-2)

  print("Your GPT2 implementation is correct!")

In [5]:
if __name__ == '__main__':
  test_gpt2('gpt2')
# openai_model = OpenAIGPT2Model.from_pretrained('gpt2')
# gpt = GPT2Model.from_pretrained(model='gpt2', **model_size_to_params('gpt2'))
# openai_model.eval()
# gpt.eval()
# sent_ids = torch.tensor([[101, 7592, 2088, 102, 0, 0, 0, 0],
#                            [101, 7592, 15756, 2897, 2005, 17953, 2361, 102]])
# att_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1]])
# outputs = gpt(sent_ids, att_mask)
# openai_outputs = openai_model(input_ids=sent_ids, attention_mask=att_mask, output_hidden_states=True).last_hidden_state

# att_mask = att_mask.unsqueeze(-1)
# outputs['last_hidden_state'] = outputs['last_hidden_state'] * att_mask
# openai_outputs *= att_mask
# #print("OpenAI GPT2 Structure:")
# #print(openai_model)

# #print("\nCustom GPT2 Structure:")
# #print(gpt)

# # output 형태는 동일
# # print("\n\noutputs.shape:")
# # print(outputs['last_hidden_state'].shape)
# # print(openai_outputs.shape)



# # 최종 결과 차이 확인
# print("OpenAI sample value:", openai_outputs[0, 0, :5])
# print("Custom sample value:", outputs['last_hidden_state'][0, 0, :5])
# print("Diff:", (openai_outputs - outputs['last_hidden_state'])[0, 0, :5])
# # print(gpt.training) 
# # print(openai_model.training) 

# #가중치 비교하는 코드인데 문제 없음
# # # State dict 얻기
# # openai_sd = openai_model.state_dict()
# # custom_sd = gpt.state_dict()
# # # 키 세트가 일치하는지 먼저 확인
# # openai_keys = set(openai_sd.keys())
# # custom_keys = set(custom_sd.keys())
# # # 1. 키 이름 차이
# # only_in_openai = openai_keys - custom_keys
# # only_in_custom = custom_keys - openai_keys
# # if only_in_openai or only_in_custom:
# #     print("❗ Key mismatch detected!")
# #     if only_in_openai:
# #         print(f" - Missing in custom: {only_in_openai}")
# #     if only_in_custom:
# #         print(f" - Extra in custom: {only_in_custom}")
# # else:
# #     print("✅ All keys match.")
# # # 2. 값 비교
# # for key in openai_keys & custom_keys:
# #     if not torch.allclose(openai_sd[key], custom_sd[key], atol=1e-3, rtol=1e-2):
# #         print(f"❗ Parameter mismatch at key: {key}")
# #         diff = (openai_sd[key] - custom_sd[key]).abs().mean().item()
# #         print(f" - Mean absolute difference: {diff:.5f}")
# #         break
# # else:
# #     print("✅ All parameter values match closely.")

Your GPT2 implementation is correct!
