In [1]:
import torch
from transformers import AutoTokenizer, AutoModel
import transformers
from transformers import BloomForCausalLM
from transformers import BloomTokenizerFast
import re


In [2]:
# tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
# model = AutoModel.from_pretrained("bigscience/bloom-560m")

model = BloomForCausalLM.from_pretrained("bigscience/bloom-1b7")
tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-1b7")

# tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-1b7")
# model = AutoModel.from_pretrained("bigscience/bloom-1b7")

In [3]:
text = "The tree has yellow flowers and green leaves"
question = "What parts does the tree have?"

# result_length calibrates the size of the response (in tokens) we get for the prompt from the model
result_length = 50

prompt = {
    'Text': text,
    'Question': question
    }

prompt = """
Text: The tree has yellow flowers and green leaves
Question: What parts does the tree have?
Answer:
"""

inputs = tokenizer(prompt, return_tensors="pt")

In [4]:
# Greedy Search
print(tokenizer.decode(model.generate(inputs["input_ids"], 
                       max_length=result_length
                      )[0]))


Text: The tree has yellow flowers and green leaves
Question: What parts does the tree have?
Answer:
The tree has yellow flowers and green leaves
The tree has yellow flowers and green leaves
The tree has yellow flowers and green leaves
The


In [5]:
# Beam Search
print(tokenizer.decode(model.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))


Text: The tree has yellow flowers and green leaves
Question: What parts does the tree have?
Answer:
1. Yellow flowers
2. Green leaves

A:

The answer to the question is

 The leaves are green and the flowers are yellow


In [39]:
# Sampling Top-k + Top-p
print(tokenizer.decode(model.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))


Text: The tree has yellow flowers and green leaves
Question: What parts does the tree have?
Answer:
The tree has yellow flowers and green leaves
Answer:
The tree has yellow flowers and green leaves
Answer:
The tree has yellow


### Beam Search

In [12]:
prompt = """
Text: The tree has yellow flowers, green leaves and brown bark.
Question: What parts does the tree have?
Answer:
"""

inputs = tokenizer(prompt, return_tensors="pt")

# Beam Search
output = tokenizer.decode(model.generate(inputs["input_ids"],
                          max_length=70, 
                          num_beams=2, 
                          no_repeat_ngram_size=2,
                          early_stopping=True
                          )[0])

In [13]:
print(output)


Text: The tree has yellow flowers, green leaves and brown bark.
Question: What parts does the tree have?
Answer:
1. Yellow flowers
2. Green leaves
3. Brown bark

A:

The answer is

 The leaves are green and the bark is brown.

The leaves have yellow petals and green stamens. The bark


In [54]:
prompt = """
Text: Fagus sylvatica is a large tree, capable of reaching heights of up to 50 metres (160 feet) tall[3] and 3 m (10 ft) trunk diameter, though more typically 25–35 m (82–115 ft) tall and up to 1.5 m (5 ft) trunk diameter. A 10-year-old sapling will stand about 4 m (13 ft) tall. It has a typical lifespan of 150–200 years, though sometimes up to 300 years. In cultivated forest stands trees are normally harvested at 80–120 years of age.[4] 30 years are needed to attain full maturity (as compared to 40 for American beech). Like most trees, its form depends on the location: in forest areas, F. sylvatica grows to over 30 m (100 ft), with branches being high up on the trunk. In open locations, it will become much shorter (typically 15–24 m or 50–80 ft) and more massive.
The leaves are alternate, simple, and entire or with a slightly crenate margin, 5–10 centimetres (2–4 inches) long and 3–7 cm broad, with 6–7 veins on each side of the leaf (as opposed to 7–10 veins in F. orientalis). When crenate, there is one point at each vein tip, never any points between the veins. The buds are long and slender, 15–30 millimetres (5⁄8–1+1⁄8 in) long and 2–3 mm (3⁄32–1⁄8 in) thick, but thicker (to 4–5 mm) where the buds include flower buds.
Question: What parts does the tree have?
Answer:
"""

result_length = 353

inputs = tokenizer(prompt, return_tensors="pt")

# Beam Search
output = tokenizer.decode(model.generate(inputs["input_ids"],
                          max_length=500, 
                          num_beams=4, 
                          no_repeat_ngram_size=2,
                          early_stopping=True
                          )[0])

print(output)


Text: Fagus sylvatica is a large tree, capable of reaching heights of up to 50 metres (160 feet) tall[3] and 3 m (10 ft) trunk diameter, though more typically 25–35 m (82–115 ft) tall and up to 1.5 m (5 ft) trunk diameter. A 10-year-old sapling will stand about 4 m (13 ft) tall. It has a typical lifespan of 150–200 years, though sometimes up to 300 years. In cultivated forest stands trees are normally harvested at 80–120 years of age.[4] 30 years are needed to attain full maturity (as compared to 40 for American beech). Like most trees, its form depends on the location: in forest areas, F. sylvatica grows to over 30 m (100 ft), with branches being high up on the trunk. In open locations, it will become much shorter (typically 15–24 m or 50–80 ft) and more massive.
The leaves are alternate, simple, and entire or with a slightly crenate margin, 5–10 centimetres (2–4 inches) long and 3–7 cm broad, with 6–7 veins on each side of the leaf (as opposed to 7–10 veins in F. orientalis). When c