In [1]:
import os, pickle
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, LogitsProcessorList, LogitsProcessor
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_8bit=True  # 8비트 양자화 설정
)

# 모델과 토크나이저 로드
model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'

model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 사용할 어휘 집합 정의


with open('final_target.pkl', 'rb') as f:
    final_target = pickle.load(f)

allowed_token_ids = list(final_target)
allowed_token_ids

  from .autonotebook import tqdm as notebook_tqdm
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.05it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [2]:

class CustomLogitsProcessor(LogitsProcessor):
    def __call__(self, input_ids, scores):
        mask = torch.full_like(scores, 1)
        mask[:, allowed_token_ids] = 1.0
        return scores * mask 

# 텍스트 생성 함수
def generate_text(input_text, max_length=50):
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    logits_processor = LogitsProcessorList([CustomLogitsProcessor()])
    output = model.generate(input_ids, max_length=max_length, logits_processor=logits_processor, temperature = 0.3,)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [3]:
generate_text("Explanation of theory of general relativity in 2 sentences: ", max_length=100)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
2024-05-24 14:55:14.822558: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-24 14:55:14.844272: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-24 14:55:14.844292: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-24 14:55:14.845178: E ext

'Explanation of theory of general relativity in 2 sentences: 1. According to the theory of general relativity, gravity is not a force that acts between objects, but rather a curvature of spacetime caused by the presence of mass and energy. 2. The curvature of spacetime around a massive object such as a star or a black hole is what we experience as gravity, and it is this curvature that determines the motion of objects in the vicinity of the object. Explanation of theory of'

In [4]:

class CustomLogitsProcessor(LogitsProcessor):
    def __call__(self, input_ids, scores):
        mask = torch.full_like(scores, -float('inf'))
        mask[:, allowed_token_ids] = 0
        return scores + mask 

# 텍스트 생성 함수
def generate_text(input_text, max_length=50):
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    logits_processor = LogitsProcessorList([CustomLogitsProcessor()])
    output = model.generate(input_ids, max_length=max_length, logits_processor=logits_processor, temperature = 0.3,)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [5]:
generate_text("Explanation of theory of general relativity in 2 sentences: ", max_length=100)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'Explanation of theory of general relativity in 2 sentences: 1. The general\nrelatvity is a part of the modern\nphysics that was developed by\nAlberth Ei\nnstei\nn in the early 20th century. It is a\ntheo\nry that\ndes\nc\nr\nib\nes\nthe\nbeh\nav\ni\nor\nof\ngr\nav\nit\ny\nand\nthe\nw'

In [6]:

class CustomLogitsProcessor(LogitsProcessor):
    def __call__(self, input_ids, scores):
        mask = torch.full_like(scores, 1)
        mask[:, allowed_token_ids] = 2
        return scores * mask 

# 텍스트 생성 함수
def generate_text(input_text, max_length=50):
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    logits_processor = LogitsProcessorList([CustomLogitsProcessor()])
    output = model.generate(input_ids, max_length=max_length, logits_processor=logits_processor, temperature = 0.3,)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [7]:
generate_text("Explanation of theory of general relativity in 2 sentences: ", max_length=100)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'Explanation of theory of general relativity in 2 sentences: 1. The general\nrela tive\ntheo\nry\nof\nAl\nber\nt\nEi\nn\nst\ne\ni\nn\n(1915) is a\ntheo\nry\nof\ng\nra\nvi\nty\nthat\ndes\ncr\ni\nbes\nthe\nin\nter\nac\nti\non\nbe\ntw\nee\n'

In [14]:

class CustomLogitsProcessor(LogitsProcessor):
    def __call__(self, input_ids, scores):
        mask = torch.full_like(scores, 1)
        mask[:, allowed_token_ids] = 1.1
        return scores * mask 

# 텍스트 생성 함수
def generate_text(input_text, max_length=50):
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.to(device)
    logits_processor = LogitsProcessorList([CustomLogitsProcessor()])
    output = model.generate(input_ids, max_length=max_length, logits_processor=logits_processor, temperature = 0.3,)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [15]:
generate_text("Explanation of theory of general relativity in 2 sentences: ", max_length=100)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'Explanation of theory of general relativity in 2 sentences: 1. The theory of general relativity, developed by Albert Einstein, states that the curvature of spacetime is caused by the presence of mass and energy. 2. According to this theory, the more massive the object, the more it warps the fabric of spacetime around it, and the more it affects the motion of other objects in its vicinity.\nWhat is the main idea of general relativity? The main idea of'