# Fast Inference

## Install Requirements


In [None]:
%pip install bitsandbytes==0.41.0
%pip install transformers @ git+https://github.com/huggingface/transformers.git
%pip install peft @ git+https://github.com/huggingface/peft.git@v0.3.0
%pip install accelerate @ git+https://github.com/huggingface/accelerate.git@v0.20.3
%pip install einops==0.6.1
%pip install evaluate==0.4.0
%pip install scikit-learn==1.2.2
%pip install sentencepiece==0.1.99
%pip install wandb==0.15.3

## Generate Text


In [None]:
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import torch


def generate_text(prompt: str) -> str:
    # Create tokenizer and model
    base_model = "timdettmers/guanaco-33b-merged"
    tokenizer = LlamaTokenizer.from_pretrained(base_model)
    # base model
    model = LlamaForCausalLM.from_pretrained(
        base_model,
        torch_dtype=torch.float16,
        device_map="auto",
    )
    # LORA PEFT adapters
    adapter_model = "lyogavin/Anima33B"
    model = PeftModel.from_pretrained(
        model=model,
        adapter_name=adapter_model,
        # torch_dtype=torch.float16,
    )
    model.eval()
    inputs = tokenizer(prompt, return_tensors="pt")
    # Generate
    generate_ids = model.generate(**inputs, max_new_tokens=30)
    text = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]
    return text


text = generate_text("The quick brown fox jumps over the lazy dog")
print(text)