In [1]:
!pip install flash-attn transformers accelerate

import time
from datetime import timedelta

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.utils import is_flash_attn_2_available

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
).to("cuda")
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

print("flash_attn_2 available:", is_flash_attn_2_available())



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


flash_attn_2 available: True


In [2]:
def gen(text):
    start = time.perf_counter()
    prompt = "<|user|>\n{} <|end|>\n<|assistant|>".format(text)
    tokens = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(tokens, max_new_tokens=1024)
    output_string = tokenizer.decode(outputs[0])
    del tokens
    print("took", timedelta(seconds=time.perf_counter() - start))
    return output_string


gen("What is the closest star to the Sun?")

took 0:00:07.003518


'<s><|user|> What is the closest star to the Sun? <|end|><|assistant|> The closest star to the Sun is Proxima Centauri. It is part of the Alpha Centauri star system, which also includes Alpha Centauri A and Alpha Centauri B. Proxima Centauri is approximately 4.24 light-years away from the Sun. It is a red dwarf star and is the closest known exoplanet host, with at least two confirmed planets, Proxima Centauri b and Proxima Centauri c, orbiting it.<|end|>'

In [3]:
gen("What is an example prompt I can send you?")

took 0:00:06.067000


'<s><|user|> What is an example prompt I can send you? <|end|><|assistant|> Certainly! Here\'s an example prompt you can send to generate a response:\n\n"Imagine you are a travel guide writer specializing in European destinations. Write a brief, engaging description of a day trip to the historic city of Prague, Czech Republic, highlighting its key attractions, local cuisine, and unique cultural experiences. Aim for a tone that is informative, yet captivating for potential travelers."\n\nThis prompt encourages a detailed and interesting response that showcases your ability to create engaging content, while also providing useful information for potential travelers.<|end|>'