In [1]:
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForQuestionAnswering, ORTModelForCausalLM
import torch
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# onnx_path = 'temp/llama13b-onnx/'
# onnx_path = 'temp/llamaII7b-onnx/'
onnx_path = 'temp/llamaII13b-onnx/'

tokenizer = AutoTokenizer.from_pretrained(onnx_path)
model = ORTModelForCausalLM.from_pretrained(onnx_path, use_cache=False, use_io_binding=False).to(device)


2023-08-06 15:03:38.855484593 [W:onnxruntime:, session_state.cc:1169 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-08-06 15:03:38.855550935 [W:onnxruntime:, session_state.cc:1171 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.


In [4]:
ts = time.time()
inputs = tokenizer("The following is a detailed recipe for a cake named 'Death-by-Chocolate': \nFirst ", return_tensors="pt").to(device)
# print(inputs)
# outputs = model.generate(**inputs, do_sample=True, max_length=50, early_stopping=True, no_repeat_ngram_size=3, num_beams=1)
outputs = model.generate(**inputs, max_length=1110, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=4, temperature=0.7, num_beams=1, no_repeat_ngram_size=2, early_stopping=True)
print("Time taken: ", time.time() - ts)
for seq in tokenizer.batch_decode(outputs):
  print(seq)
  print('---')

Time taken:  2849.8228986263275
<s> The following is a detailed recipe for a cake named 'Death-by-Chocolate': 
First  you take a cup of flour and mix it with a tablespoon of baking powder.  Then,
Second  add a half cup each of sugar and cocoa powders, mix them well.
Third  take one stick of butter and one cup  of milk, melt them together in a saucepan. Then
Fourth  pour the buttermilk mixture into the other ingredients and beat the batter until smooth. Pour
the batter into a greased casserole and bake at 375 degrees for about an hour. Serve with whipped cream.</s><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>

In [3]:

is_stoptoken = False
recipe = "Title: Death-by-Chocolate \
\nCategory: Desserts \
\nTags: Chocolate, Cake \
\nYield: 9 servings \
\nIngredients: \
\n  "

# input_tokens = tokenizer(recipe, return_tensors="pt").to(device)
# token_outputs = model.generate(**input_tokens, max_new_tokens=3300, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=5, temperature=0.7, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
# text_outputs = tokenizer.batch_decode(token_outputs, skip_special_tokens=True)
# for i in range(len(text_outputs)):
#     print(f"Recipe {i+1}: {text_outputs[i]}")
#     print(f"Recipe {i+1}: {text_outputs[i]}", file=open("lama_recipe.txt", "a"))
    

t_total = 0
previous_recipe = recipe
while not is_stoptoken:
    t_start = time.time()
    initial_input = tokenizer(previous_recipe, return_tensors="pt").to(device)
    outputs = model.generate(**initial_input, max_new_tokens=36, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=1, temperature=0.7, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
    outputs_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    previous_recipe = outputs_text.strip(previous_recipe)
    recipe += previous_recipe
    print(f"Generated text: {previous_recipe}")
    print(f"{previous_recipe}", file=open("lama_recipe.txt", "a"))
    print(f"Elapsed time: {time.time() - t_start:.3f} seconds\nRecipe length: {len(recipe.split())} words")
    # Check for stop token
    outputs_text = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]
    t_total += time.time() - t_start
    if '</s>' in outputs_text:
        is_stoptoken = True

print(f"Total time: {t_total:.3f} seconds")
print(recipe)


Generated text: 1. 1 cup unsweetened cocoa powder, plus more for dusting

    * 2 cups all-purpose flour, sif
Elapsed time: 362.578 seconds
Recipe length: 27 words
Generated text: Preheat oven to 350 degrees F (175 degrees C). Grease a 9x13 inch b
Elapsed time: 306.079 seconds
Recipe length: 40 words
Generated text: king dish.
In a large bowl, cream together the butter and sugar until light and fluffy. Beat in the eggs, one at a time,
Elapsed time: 275.290 seconds
Recipe length: 63 words
Generated text: Combine the flour, baking powder and salt; stir into the creamed mixture. Stir in chocolate chip
Elapsed time: 303.377 seconds
Recipe length: 78 words
Generated text: 
Drop by rounded teaspoonfuls 2 inches apart onto ungreased cookie sheets. Bake at 375° for 8-10
Elapsed time: 262.739 seconds
Recipe length: 94 words
Generated text: minutes or until lightly browned. Cool on w
Elapsed time: 303.964 seconds
Recipe length: 101 words
Total time: 1814.030 seconds
Title: Death-by-Chocolate 
Ca

In [4]:
print(recipe, file=open("lama_recipe_full.txt", "a"))

In [4]:
print("""Time taken:  138.5658130645752
<s> What is the meaning of life? Is it possible to answer this question? The most important and well-known philosophers and thinkers of the world have been trying to give an answer to this. One of them was the Russian philosopher Vladimir Nabokov. He said that the question is impossible to be answered. But we can find some hints to the answer. And the main hint is that we must search for it.
The main idea of Nietzsche is a will to power. This means that each of us has a certain will
---
<s> What is the meaning of life? How is it possible to live a meaningful life, to find purpose in life and meaning in suffering? Are there answers to these fundamental questions?
This course will look at the question of meaning and the possibility of finding meaning through suffering, and through the way of the Buddha. It will investigate how we can find meaning as we go through life.
The course is based on an in-depth study of Buddhist texts, mainly the Pali Canon and Theravada texts. These are texts which have been studied
---
<s> What is the meaning of life? What does it mean to be happy? Why is it so hard to live a moral life and have a good conscience?
The answers to these questions are not as simple as we might like. But they are important, and are the subject of this book.
In 1977, I went to Rome for the first time and met with Pope John Paul II. Afterwards I gave him a copy of my book The Cross and the Switchblade, a book he had read. He asked
---
<s> What is the meaning of life? We all ask this question but how many of us can answer it? There are many answers to this questions, and these answers are usually given by theologians and philosophers. But what does the Bible say about it.
A question like this is not easy to answer, but it is possible to come up with a good answer. The Bible tells us that God created all things, including life. He also tells that he created the world for us to enjoy. So, what is life about?
The
---""", file=open("lama.txt", "w")

SyntaxError: incomplete input (2561082293.py, line 22)