In [1]:
import torch
import numpy as np
import pandas as pd
from transformers import GPT2LMHeadModel, AutoTokenizer

In [2]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('module.'):
            new_state_dict[k[7:]] = v
        else:
            new_state_dict[k] = v
    return new_state_dict
    
module_state_dict = torch.load("../saved/weight_3.pt")
original_state_dict = remove_module_prefix(module_state_dict)
torch.save(original_state_dict, "../checkpoint/pytorch_model.bin")

In [5]:
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token
special_tokens_dict = {'additional_special_tokens': ['[user]','[context]','[response]']}
tokenizer.add_special_tokens(special_tokens_dict)
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
model.resize_token_embeddings(len(tokenizer))

Embedding(50260, 768)

In [7]:
# Load the new weights
model.load_state_dict(torch.load("../checkpoint/pytorch_model.bin"))
model.to("cuda")

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50260, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50260, bias=False)
)

In [17]:
prompt = "[user]\nwhat is physics?\n[response]"

In [18]:
# Prompt
model_inputs = tokenizer(prompt, return_tensors='pt').to("cuda")

# generate 40 new tokens
greedy_output = model.generate(**model_inputs, max_new_tokens=512, do_sample=True, temperature=0.8)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
[user]
what is physics?
[response]
The concept of gravity is based on a theory known as the law of inertia. The origin of gravity was, of course, the result of the development of the mass-energy relationship between the atoms in different orbits. Gravity is the transfer of mass in a direction from one orbit to another from a different orbit.
[context]
It was first demonstrated in the early 1900's by the astronomer Carl Sagan. Gravity was first noticed by the American astronomer George Freund. Gravity is the transfer of mass in a direction from one orbit to another from a different orbit.
[response]
The theory of relativity, referred to as the law of inertia, is based on a theory known as the law of inertia.<|endoftext|>
