## Reference
### [LLAMA](https://huggingface.co/meta-llama)
* https://huggingface.co/meta-llama/Llama-2-7b
* https://huggingface.co/meta-llama/Llama-2-13b
* https://huggingface.co/meta-llama/Llama-2-70b

### OPT
* https://huggingface.co/facebook/opt-125m
* https://huggingface.co/facebook/opt-350m
* https://huggingface.co/facebook/opt-1.3b
* https://huggingface.co/facebook/opt-2.7b
* https://huggingface.co/facebook/opt-6.7b
* https://huggingface.co/facebook/opt-13b
* https://huggingface.co/facebook/opt-30b
* https://huggingface.co/facebook/opt-66b

### BLOOM
* https://huggingface.co/bigscience/bloom-560m
* https://huggingface.co/bigscience/bloom-1b1
* https://huggingface.co/bigscience/bloom-1b7
* https://huggingface.co/bigscience/bloom-3b
* https://huggingface.co/bigscience/bloom-7b1
* https://huggingface.co/bigscience/bloom

In [22]:
from transformers import AutoModelForCausalLM, AutoConfig

# User access token name: llama_access. Permission: read
access_token = "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"

def load_and_print_llama_model(model_name="meta-llama/Llama-2-70b-hf"):
    config = AutoConfig.from_pretrained(model_name, output_hidden_states=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=access_token)

    if "Llama-2-" in model_name:
        layers = model.base_model.layers
    else:
        raise ValueError("Invalid model name")
    print("# of layers:", len(layers), "The following is the first layer")
    for idx, layer in enumerate(layers):
        if idx == 0:
            print(layer.__dict__)

def load_and_print_model(model_name="facebook/opt-1.3b"):
    config = AutoConfig.from_pretrained(model_name, output_hidden_states=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, config=config)
    
    if "opt-" in model_name:
        layers = model.model.decoder.base_model.layers
    elif "bloom-" in model_name:
        layers = model.transformer.h
    else:
        raise ValueError("Invalid model name")
    print("# of layers:", len(layers), "The following is the first layer")
    for idx, layer in enumerate(layers):
        if idx == 0:
            print(layer.__dict__)

In [21]:
load_and_print_model("facebook/opt-1.3b")

# of layers: 24 The following is the first layer
{'training': False, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_non_persistent_buffers_set': set(), '_backward_pre_hooks': OrderedDict(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_hooks_with_kwargs': OrderedDict(), '_forward_hooks_always_called': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_forward_pre_hooks_with_kwargs': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict([('self_attn', OPTAttention(
  (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
  (v_proj): Linear(in_features=2048, out_features=2048, bias=True)
  (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
  (out_proj): Linear(in_features=2048, out_features=2048, bias=True)
)), ('activation_fn', ReLU()), 

In [23]:
# Load model that is converted to huggingface
load_and_print_llama_model("meta-llama/Llama-2-7b-hf")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# of layers: 32 The following is the first layer
{'training': False, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_non_persistent_buffers_set': set(), '_backward_pre_hooks': OrderedDict(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_hooks_with_kwargs': OrderedDict(), '_forward_hooks_always_called': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_forward_pre_hooks_with_kwargs': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict([('self_attn', LlamaSdpaAttention(
  (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
  (rotary_emb): LlamaRo

In [24]:
load_and_print_model("bigscience/bloom-3b")

# of layers: 30 The following is the first layer
{'training': False, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_non_persistent_buffers_set': set(), '_backward_pre_hooks': OrderedDict(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_hooks_with_kwargs': OrderedDict(), '_forward_hooks_always_called': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_forward_pre_hooks_with_kwargs': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict([('input_layernorm', LayerNorm((2560,), eps=1e-05, elementwise_affine=True)), ('self_attention', BloomAttention(
  (query_key_value): Linear(in_features=2560, out_features=7680, bias=True)
  (dense): Linear(in_features=2560, out_features=2560, bias=True)
  (attention_dropout): Dropout(p=0.0, inplace=False)
)), ('post_attention_l