In [1]:
import sys
import os
from argparse import ArgumentParser

import transformers
from accelerate import init_empty_weights
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm
import torch
from sfm.models.scigpt.scigpt import ScigptModel
from sfm.models.scigpt.config import ScigptConfig
from sfm.utils import arg_utils
from sfm.utils.science_tokens import SCIENCE_TAG_TOKENS



  from .autonotebook import tqdm as notebook_tqdm


[2024-04-25 01:10:07,124] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[[32m2024-04-25 01:10:07.533[0m][[36mINFO[0m]: apex is installed, using FusedAdam with fp16 optimizer states


In [2]:
IGNORE_INDEX = -100
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "<s>"
DEFAULT_UNK_TOKEN = "<unk>"

def get_args_and_tokenizer(use_llama=False):
    parser = ArgumentParser()
    cfg_classes = [ScigptConfig]
    parser = arg_utils.add_dataclass_to_parser(cfg_classes, parser)
    args = parser.parse_args(args=[])
    args.load_ckpt = False
    args.strategy = "DDP"
    args.encoder_layers = 33
    args.encoder_embed_dim = 1280
    args.encoder_ffn_embed_dim = 5120
    args.encoder_attention_heads = 20
    args.infer = True
    args.bf16 = True
    
    tokenizer = AutoTokenizer.from_pretrained("/data/peiran/blob/hai1data/sfm/llama/Meta-Llama-3-8B/original")
    args.save_dir = "/data/peiran/blob/hai1data/sfm/llama/Meta-Llama-3-8B/original"
    args.llm_model_name_or_path = "/data/peiran/blob/hai1data/sfm/llama/Meta-Llama-3-8B/original"

    special_tokens_dict = dict()
    if tokenizer.pad_token is None:
        special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN
    if tokenizer.eos_token is None:
        special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN
    if tokenizer.bos_token is None:
        special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN
    if tokenizer.unk_token is None:
        special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN

    # special_tokens_dict["additional_special_tokens"] = SCIENCE_TAG_TOKENS
    tokenizer.add_special_tokens(special_tokens_dict)
        

    return args, tokenizer

args, tokenizer = get_args_and_tokenizer()
print(type(tokenizer))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


<class 'transformers.tokenization_utils_fast.PreTrainedTokenizerFast'>


In [3]:
ckpt_dict = {}

model = ScigptModel(args)
# model = AutoModelForCausalLM.from_pretrained(args.save_dir)

model_dict = model.state_dict()
print(f"model_dict: {model_dict.keys()}")

layer0 = torch.load(os.path.join(args.save_dir, "layer_00-model_states.pt"), map_location=torch.device("cpu"))
for k, v in layer0.items():
    new_k = "decoder.model." + k
    ckpt_dict[new_k] = v

# layer1 = torch.load(os.path.join(args.save_dir, "layer_01-model_states.pt"), map_location=torch.device("cpu"))
# ckpt_dict['embed_tokens.weight'] = layer1['embed_tokens.weight']

# layer2 = torch.load(os.path.join(args.save_dir, "layer_02-model_states.pt"), map_location=torch.device("cpu"))
# for k, v in layer2.items():
#     new_k = "adaptor." + k
#     ckpt_dict[new_k] = v

for l in range(0, 32):
    l_index = str(l + 1).zfill(2)
    layer = torch.load(os.path.join(args.save_dir, f"layer_{l_index}-model_states.pt"), map_location=torch.device("cpu"))
    for k in layer:
        if "dummy" in k or 'rotary_emb' in k:
            continue
        ckpt_dict[f"decoder.model.layers.{l}.{k}"] = layer[k]
    del layer

layer = torch.load(os.path.join(args.save_dir, "layer_33-model_states.pt"), map_location=torch.device("cpu"))
ckpt_dict["decoder.model.norm.weight"] = layer["norm.weight"]

layer = torch.load(os.path.join(args.save_dir, "layer_34-model_states.pt"), map_location=torch.device("cpu"))
ckpt_dict["decoder.lm_head.weight"] = layer["lm_head.weight"]

print(f"ckpt_dict: {ckpt_dict.keys()}")
model_dict.update(ckpt_dict)
model.load_state_dict(model_dict)



model_dict: odict_keys(['decoder.model.embed_tokens.weight', 'decoder.model.layers.0.self_attn.q_proj.weight', 'decoder.model.layers.0.self_attn.k_proj.weight', 'decoder.model.layers.0.self_attn.v_proj.weight', 'decoder.model.layers.0.self_attn.o_proj.weight', 'decoder.model.layers.0.mlp.gate_proj.weight', 'decoder.model.layers.0.mlp.up_proj.weight', 'decoder.model.layers.0.mlp.down_proj.weight', 'decoder.model.layers.0.input_layernorm.weight', 'decoder.model.layers.0.post_attention_layernorm.weight', 'decoder.model.layers.1.self_attn.q_proj.weight', 'decoder.model.layers.1.self_attn.k_proj.weight', 'decoder.model.layers.1.self_attn.v_proj.weight', 'decoder.model.layers.1.self_attn.o_proj.weight', 'decoder.model.layers.1.mlp.gate_proj.weight', 'decoder.model.layers.1.mlp.up_proj.weight', 'decoder.model.layers.1.mlp.down_proj.weight', 'decoder.model.layers.1.input_layernorm.weight', 'decoder.model.layers.1.post_attention_layernorm.weight', 'decoder.model.layers.2.self_attn.q_proj.weight

<All keys matched successfully>

In [6]:
device = torch.device("cuda")
model.decoder.resize_token_embeddings(len(tokenizer))
model = model.to(torch.bfloat16).to(device)

model.eval()

# print(f"input: {text},\n output: {res}")

# # output = model.generate(
# #     input_ids=batched_data['input_ids'],
# #     num_return_sequences=10,
# #     num_beams=20,
# # )
# for i in range(10):
#     print(tokenizer.decode(output[i]))


  input_ids=torch.tensor(tokenizer.encode("what to eat", return_tensors="pt")).to(device),
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


what to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where to shop, what to buy, where to eat, what to drink, where to go, how to get there, what to see, what to do, where to stay, where

In [7]:
output = model.decoder.generate(
    input_ids=torch.tensor(tokenizer.encode("Football is a ", return_tensors="pt")).to(device),
    num_beams=5,
    max_new_tokens=512,
    num_return_sequences=1,
    return_dict_in_generate=True,
    output_scores=True,
    do_sample=True,
    top_p=0.95,
    repetition_penalty=1.5,
)
res = tokenizer.decode(output.sequences[0], skip_special_tokens=False)
print(res)

  input_ids=torch.tensor(tokenizer.encode("Football is a ", return_tensors="pt")).to(device),
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Football is a 90-minute game. Ninety minutes is all you get. If you don't win in 90 minutes, you deserve to lose."
"Winning isn't everything, but wanting to win is."
"The only way to be truly satisfied is to do what you believe is great work. And the only way to do great work is to love what you do. If you haven't found it yet, keep looking. Don't settle. As with all matters of the heart, you'll know when you find it. And, like any great relationship, it just gets better and better as the years roll on. So keep looking until you find it. Don't settle."
"I've missed more than 9000 shots in my career. I've lost almost 300 games. 26 times, I've been trusted to take the game winning shot and missed. I've failed over and over and over again in my life. And that is why I succeed."
"I can accept failure, everyone fails at something. But I can't accept not trying."
"Whatever you do, you need courage. Whatever course you decide upon, there is always someone to tell you that you are wrong. There

In [1]:
import torch
ckpt = torch.load("/data/peiran/blob/hai1data/sfm/pfmexp/output/stageB/global_step12386/layer_01-model_states.pt")