In [None]:
import torch
from transformers import AutoTokenizer
from transformers import BitsAndBytesConfig, AutoModelForCausalLM

In [None]:
base_model_id = "mistralai/Mistral-7B-v0.1"
model_max_length = 512
project = "alpaca-finetune"
base_model_name = "mistral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

In [None]:
# tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True)

In [None]:
# model
bnb = BitsAndBytesConfig(load_in_4bit=True,
                         bnb_4bit_use_double_quant=True,
                         bnb_4bit_quant_type="nf4",
                         bnb_4bit_compute_dtype=torch.bfloat16)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb)

In [None]:
# evaluation inputs
eval_input = {'instruction': 'Construct a metaphor that compares life to a road.'}
eval_prompt = ("Below is an instruction that describes a task. "
            "Write a response that appropriately completes the request.\n\n"
            "### Instruction:\n{instruction}\n\n### Response:\n").format_map(eval_input)

In [None]:
# eval
input = tokenizer(eval_prompt, return_tensors="pt").to('cuda')
model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**input, max_new_tokens=model_max_length)[0]))