In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("matsuo-lab/weblab-10b")

In [13]:
model = AutoModelForCausalLM.from_pretrained("matsuo-lab/weblab-10b", torch_dtype=torch.float16, output_hidden_states = True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
if torch.cuda.is_available():
    model = model.to("cuda")

text = "大阪大学はどういう大学ですか"
token_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt")

In [15]:
with torch.no_grad():
    output_ids = model.generate(
        token_ids.to(model.device),
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7,
        top_p=0.95
    )

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [16]:
output = tokenizer.decode(output_ids.tolist()[0])
print(output)

大阪大学はどういう大学ですか？
1 大阪大学と神戸大学どちらがいいですか? 大阪大学と神戸大学ではどちらがいいですか? また、神戸大学には
2 大阪大学の工学部と神戸大学の工学部ではどちらがいいですか?
3 大阪大学の


In [18]:
output_encoded = model(token_ids.to(model.device))

In [19]:
output_encoded.keys()

odict_keys(['logits', 'past_key_values', 'hidden_states'])

In [20]:
hidden_states = output_encoded.hidden_states

In [21]:
len(hidden_states)

37

In [22]:
model.config

GPTNeoXConfig {
  "_name_or_path": "matsuo-lab/weblab-10b",
  "architectures": [
    "GPTNeoXForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.1,
  "eos_token_id": 0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_size": 4864,
  "initializer_range": 0.02,
  "intermediate_size": 19456,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 2048,
  "model_type": "gpt_neox",
  "num_attention_heads": 38,
  "num_hidden_layers": 36,
  "output_hidden_states": true,
  "rope_scaling": null,
  "rotary_emb_base": 10000,
  "rotary_pct": 0.25,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.32.1",
  "use_cache": true,
  "use_parallel_residual": true,
  "vocab_size": 50304
}

In [23]:
hidden_states[-1].shape

torch.Size([1, 15, 4864])

In [24]:
token_ids.shape

torch.Size([1, 15])