## Login to Hugging Face

In [1]:
from dotenv import load_dotenv
import os
from huggingface_hub import login

load_dotenv()
token = os.getenv("HUGGINGFACE_TOKEN")
login(
    token=token, # ADD YOUR TOKEN HERE
    add_to_git_credential=True
)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (osxkeychain).
Your token has been saved to /Users/pathfinder/.cache/huggingface/token
Login successful


## Imports

In [2]:
from IPython.display import display, Markdown

# pytorch
import torch

# huggingface
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)

## Device

In [3]:
# Device setup
device = (
    "cuda:0" if torch.cuda.is_available() else # Nvidia GPU
    "mps" if torch.backends.mps.is_available() else # Apple Silicon GPU
    "cpu"
)
print(f"Device = {device}")

Device = mps


In [4]:
# Flash Attention Implementation
if device == "cuda:0":
    if torch.cuda.get_device_capability()[0] >= 8: # Ampere, Ada, or Hopper GPUs
        attn_implementation = "flash_attention_2"
        torch_dtype = torch.bfloat16
    else:
        attn_implementation = "eager"
        torch_dtype = torch.float16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float32
print(f"Attention Implementation = {attn_implementation}")

Attention Implementation = eager


## Hyperparameters

In [5]:
################################################################################
# Tokenizer parameters
################################################################################
max_length=1024
padding="do_not_pad" # "max_length", "longest", "do_not_pad"
truncation=True

################################################################################
# Generation parameters
################################################################################
num_return_sequences=1
max_new_tokens=1024
do_sample=True # True for sampling, False for greedy decoding
temperature=0.9
top_k=40
top_p=0.9
repetition_penalty=1.1

################################################################################
# bitsandbytes parameters
################################################################################
load_in_4bit=True
bnb_4bit_compute_dtype=torch_dtype
bnb_4bit_quant_type="nf4" # "nf4", #fp4"
bnb_4bit_use_double_quant=True

## Model

In [6]:
# Model List

# gemma variants
# "google/gemma-1.1-7b-it"
# "google/codegemma-7b-it"

# llama variants
# "meta-llama/Meta-Llama-3-8B" // downloaded
# "meta-llama/Meta-Llama-3-8B-Instruct" // downloaded
# "codellama/CodeLlama-7b-Instruct-hf"
# "PathFinderKR/Waktaverse-Llama-3-KO-8B-Instruct"

# mistral variants
# "mistralai/Mistral-7B-Instruct-v0.2"

# openELM variants
# "apple/OpenELM-3B-Instruct" // downloaded

# solar variants
# "upstage/SOLAR-10.7B-Instruct-v1.0" // downloaded
# "PathFinderKR/Waktaverse-SOLAR-KO-10.7B-Instruct"

In [7]:
model_id = "apple/OpenELM-3B-Instruct"

In [8]:
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remote_code=True)
tokenizer.padding_side = "right"

In [9]:
# Display tokenizer information
display(Markdown(f'```{tokenizer}```'))

```LlamaTokenizerFast(name_or_path='meta-llama/Llama-2-7b-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}```

In [10]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=load_in_4bit,
    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_use_double_quant=bnb_4bit_use_double_quant
)

In [11]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map=device,
    attn_implementation=attn_implementation,
    torch_dtype=torch_dtype,
    #quantization_config=quantization_config,
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
# Display the model architecture
display(Markdown(f'```{model}```'))

```OpenELMForCausalLM(
  (transformer): OpenELMModel(
    (token_embeddings): Embedding(32000, 3072)
    (layers): ModuleList(
      (0): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=12, key_heads=3, value_heads=3
          (qkv_proj): Linear(in_features=3072, out_features=2304, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=1536, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=3072, bias=False)
          (proj_2): Linear(in_features=1536, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (1): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=12, key_heads=3, value_heads=3
          (qkv_proj): Linear(in_features=3072, out_features=2304, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=1536, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=3584, bias=False)
          (proj_2): Linear(in_features=1792, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (2): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=12, key_heads=3, value_heads=3
          (qkv_proj): Linear(in_features=3072, out_features=2304, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=1536, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=4096, bias=False)
          (proj_2): Linear(in_features=2048, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (3): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=12, key_heads=3, value_heads=3
          (qkv_proj): Linear(in_features=3072, out_features=2304, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=1536, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=5120, bias=False)
          (proj_2): Linear(in_features=2560, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (4): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=5632, bias=False)
          (proj_2): Linear(in_features=2816, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (5): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=6144, bias=False)
          (proj_2): Linear(in_features=3072, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (6): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=6656, bias=False)
          (proj_2): Linear(in_features=3328, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (7): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=7168, bias=False)
          (proj_2): Linear(in_features=3584, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (8): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=8192, bias=False)
          (proj_2): Linear(in_features=4096, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (9): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=8704, bias=False)
          (proj_2): Linear(in_features=4352, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (10): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=9216, bias=False)
          (proj_2): Linear(in_features=4608, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (11): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=9728, bias=False)
          (proj_2): Linear(in_features=4864, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (12): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=10240, bias=False)
          (proj_2): Linear(in_features=5120, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (13): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=11264, bias=False)
          (proj_2): Linear(in_features=5632, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (14): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=11776, bias=False)
          (proj_2): Linear(in_features=5888, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (15): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=12288, bias=False)
          (proj_2): Linear(in_features=6144, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (16): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=12800, bias=False)
          (proj_2): Linear(in_features=6400, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (17): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=16, key_heads=4, value_heads=4
          (qkv_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2048, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=13312, bias=False)
          (proj_2): Linear(in_features=6656, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (18): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=14336, bias=False)
          (proj_2): Linear(in_features=7168, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (19): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=14848, bias=False)
          (proj_2): Linear(in_features=7424, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (20): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=15360, bias=False)
          (proj_2): Linear(in_features=7680, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (21): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=15872, bias=False)
          (proj_2): Linear(in_features=7936, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (22): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=16384, bias=False)
          (proj_2): Linear(in_features=8192, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (23): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=17408, bias=False)
          (proj_2): Linear(in_features=8704, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (24): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=17920, bias=False)
          (proj_2): Linear(in_features=8960, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (25): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=18432, bias=False)
          (proj_2): Linear(in_features=9216, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (26): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=18944, bias=False)
          (proj_2): Linear(in_features=9472, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (27): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=19456, bias=False)
          (proj_2): Linear(in_features=9728, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (28): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=20480, bias=False)
          (proj_2): Linear(in_features=10240, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (29): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=20, key_heads=5, value_heads=5
          (qkv_proj): Linear(in_features=3072, out_features=3840, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=2560, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=20992, bias=False)
          (proj_2): Linear(in_features=10496, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (30): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=21504, bias=False)
          (proj_2): Linear(in_features=10752, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (31): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=22016, bias=False)
          (proj_2): Linear(in_features=11008, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (32): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=22528, bias=False)
          (proj_2): Linear(in_features=11264, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (33): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=23552, bias=False)
          (proj_2): Linear(in_features=11776, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (34): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=24064, bias=False)
          (proj_2): Linear(in_features=12032, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
      (35): OpenELMDecoderLayer(
        (attn): OpenELMMultiHeadCausalAttention(
          query_heads=24, key_heads=6, value_heads=6
          (qkv_proj): Linear(in_features=3072, out_features=4608, bias=False)
          (pos_embedding): OpenELMRotaryEmbedding(	model_dim=128, max_seq_length=4096, freq_constant=10000)
          (q_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (k_norm): OpenELMRMSNorm(num_features=128, eps=1e-06)
          (out_proj): Linear(in_features=3072, out_features=3072, bias=False)
        )
        (ffn): OpenELMFeedForwardNetwork(
          (ffn_with_glu) : True
          (proj_1): Linear(in_features=3072, out_features=24576, bias=False)
          (proj_2): Linear(in_features=12288, out_features=3072, bias=False)
          (act): SiLU()
        )
        (ffn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
        (attn_norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
      )
    )
    (norm): OpenELMRMSNorm(num_features=3072, eps=1e-06)
  )
)```

## Inference

In [13]:
def generate_response(system ,user):
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": user}
    ]
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    
    input_ids = tokenizer.encode(
        prompt,
        max_length=max_length,
        padding=padding,
        truncation=truncation,
        add_special_tokens=False,
        return_tensors="pt"
    ).to(device)
    
    outputs = model.generate(
        input_ids=input_ids,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=num_return_sequences,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        repetition_penalty=repetition_penalty
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=False)

In [14]:
system_prompt = "다음 지시사항에 대한 응답을 작성해주세요."

In [15]:
user_prompt = "피보나치 수열에 대해 설명해주세요."

In [16]:
response = generate_response(system_prompt, user_prompt)
print(response)


No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.



NotImplementedError: The operator 'aten::isin.Tensor_Tensor_out' is not currently implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.