In [1]:
from typing import List, Tuple, Optional
from transformers import AutoTokenizer

In [2]:
MODEL_PATH = ""
LLMS: List[Tuple[str, str, Optional[str]]] = [
    # Meta
    ("meta-llama/Llama-3.1-8B-Instruct", MODEL_PATH + "meta-llama/Llama-3.1-8B-Instruct", None),
    ("hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4", MODEL_PATH + "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4", "awq"),
    ("meta-llama/Llama-3.2-3B-Instruct", MODEL_PATH + "meta-llama/Llama-3.2-3B-Instruct", None),
    ("ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4", MODEL_PATH + "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4", "awq"),
    
    # Microsoft
    ("microsoft/Phi-3-mini-128k-instruct", MODEL_PATH + "microsoft/Phi-3-mini-128k-instruct", None),
    ("microsoft/Phi-3-small-128k-instruct", MODEL_PATH + "microsoft/Phi-3-small-128k-instruct", None),
    ("microsoft/Phi-3-medium-128k-instruct", MODEL_PATH + "microsoft/Phi-3-medium-128k-instruct", None),
    ("microsoft/Phi-3.5-mini-instruct", MODEL_PATH + "microsoft/Phi-3.5-mini-instruct", None),
    ("microsoft/Phi-4-mini-instruct", MODEL_PATH + "microsoft/Phi-4-mini-instruct", None),
    ("microsoft/phi-4", MODEL_PATH + "microsoft/phi-4", None),
    
    # Mistral
    ("mistralai/Mistral-Nemo-Instruct-2407", MODEL_PATH + "mistralai/Mistral-Nemo-Instruct-2407", None),
    ("mistralai/Ministral-8B-Instruct-2410", MODEL_PATH + "mistralai/Ministral-8B-Instruct-2410", None),
    
    # Qwen
    ("Qwen/Qwen2-72B-Instruct-AWQ", MODEL_PATH + "Qwen/Qwen2-72B-Instruct-AWQ", "awq"),
    ("Qwen/Qwen2-7B-Instruct", MODEL_PATH + "Qwen/Qwen2-7B-Instruct", None),
    ("Qwen/Qwen2.5-72B-Instruct-AWQ", MODEL_PATH + "Qwen/Qwen2.5-72B-Instruct-AWQ", "awq"),
    ("Qwen/Qwen2.5-14B-Instruct", MODEL_PATH + "Qwen/Qwen2.5-14B-Instruct", None),
    ("Qwen/Qwen2.5-7B-Instruct", MODEL_PATH + "Qwen/Qwen2.5-7B-Instruct", None),
    ("Qwen/Qwen2.5-3B-Instruct", MODEL_PATH + "Qwen/Qwen2.5-3B-Instruct", None),
    
    # Falcon
    ("tiiuae/Falcon3-7B-Instruct", MODEL_PATH + "tiiuae/Falcon3-7B-Instruct", None),
    ("tiiuae/Falcon3-3B-Instruct", MODEL_PATH + "tiiuae/Falcon3-3B-Instruct", None),
]

In [5]:
pad_tokens = {
    "Llama-3.1-8B-Instruct": "<|finetune_right_pad_id|>",
    "Meta-Llama-3.1-70B-Instruct-AWQ-INT4": "<|finetune_right_pad_id|>",
    "Llama-3.2-3B-Instruct": "<|finetune_right_pad_id|>",
    "Mistral-Nemo-Instruct-2407": "<pad>",
    "Ministral-8B-Instruct-2410": "<pad>",
}

In [6]:
for model, path, quant in  LLMS:
    print(model)
    tokenizer = AutoTokenizer.from_pretrained(path)
    model_name = model.split("/")[-1]
    if model_name in pad_tokens.keys():
        tokenizer.pad_token = pad_tokens[model_name]
    print(tokenizer.pad_token, tokenizer.pad_token_id)

meta-llama/Llama-3.1-8B-Instruct
<|finetune_right_pad_id|> 128004
hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4
<|finetune_right_pad_id|> 128004
meta-llama/Llama-3.2-3B-Instruct
<|finetune_right_pad_id|> 128004
ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4
<|finetune_right_pad_id|> 128004
microsoft/Phi-3-mini-128k-instruct
<|endoftext|> 32000
microsoft/Phi-3-small-128k-instruct
<|endoftext|> 100257
microsoft/Phi-3-medium-128k-instruct
<|endoftext|> 32000
microsoft/Phi-3.5-mini-instruct
<|endoftext|> 32000
microsoft/Phi-4-mini-instruct
<|endoftext|> 199999
microsoft/phi-4
<|dummy_85|> 100349
mistralai/Mistral-Nemo-Instruct-2407
<pad> 10
mistralai/Ministral-8B-Instruct-2410
<pad> 11
Qwen/Qwen2-72B-Instruct-AWQ
<|endoftext|> 151643
Qwen/Qwen2-7B-Instruct
<|endoftext|> 151643
Qwen/Qwen2.5-72B-Instruct-AWQ
<|endoftext|> 151643
Qwen/Qwen2.5-14B-Instruct
<|endoftext|> 151643
Qwen/Qwen2.5-7B-Instruct
<|endoftext|> 151643
Qwen/Qwen2.5-3B-Instruct
<|endoftext|> 151643
tiiuae/Falcon3-7B-