In [3]:
import transformers
import torch

device = "cuda:0"

model_name: str = "EleutherAI/pythia-70m"
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name, torch_dtype="auto", device_map=device
)


In [4]:
text = "Dies ist ein Beispieltext."
tokenized = tokenizer(text, return_tensors="pt")

out = model(input_ids=tokenized["input_ids"].to(torch.device(device)), output_hidden_states=True)

In [5]:
print(len(out.hidden_states))
# out.hidden_states ist:
# 0: input embedding nach dropout
# 1-5: transformer block output
# 6: final representations nach layernorm, aber vor task head.

for reps in out.hidden_states:
    print(reps.size())

7
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])


In [16]:
reps = out.hidden_states

for r in torch.stack(reps, dim=0):
    print(r.size())

torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])
torch.Size([1, 9, 512])


In [46]:
import torch
import torch.fx
from torch.fx import symbolic_trace

# https://huggingface.co/docs/optimum/v1.16.1/torch_fx/usage_guides/optimization
# from transformers.utils.fx import symbolic_trace
# Problem: GPT NeoX zB nicht implementiert

symbolic_traced: torch.fx.GraphModule = symbolic_trace(
    model, concrete_args={"inputs_embeds": None, "attention_mask":None, "head_mask": None,}
)

TraceError: symbolically traced variables cannot be used as inputs to control flow

In [37]:
from torchvision.models.feature_extraction import create_feature_extractor

fex = create_feature_extractor(model, return_nodes={'layer1': 'feat1', 'layer3': 'feat2'})

ValueError: You cannot specify both input_ids and inputs_embeds at the same time

In [38]:
model.gpt_neox

GPTNeoXModel(
  (embed_in): Embedding(50304, 512)
  (emb_dropout): Dropout(p=0.0, inplace=False)
  (layers): ModuleList(
    (0-5): 6 x GPTNeoXLayer(
      (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (post_attention_dropout): Dropout(p=0.0, inplace=False)
      (post_mlp_dropout): Dropout(p=0.0, inplace=False)
      (attention): GPTNeoXAttention(
        (rotary_emb): GPTNeoXRotaryEmbedding()
        (query_key_value): Linear(in_features=512, out_features=1536, bias=True)
        (dense): Linear(in_features=512, out_features=512, bias=True)
        (attention_dropout): Dropout(p=0.0, inplace=False)
      )
      (mlp): GPTNeoXMLP(
        (dense_h_to_4h): Linear(in_features=512, out_features=2048, bias=True)
        (dense_4h_to_h): Linear(in_features=2048, out_features=512, bias=True)
        (act): GELUActivation()
      )
    )
  )
  (final_layer_norm): LayerNor