# Natural Language Generation

For standard language generation:
 - https://huggingface.co/blog/how-to-generate
  - https://huggingface.co/blog/introducing-csearch

For constraint language generation:
 - https://huggingface.co/blog/constrained-beam-search


## Auto-regressive Models

In [1]:
import os
import sys

import transformers
from transformers import GenerationConfig, AutoTokenizer, AutoModel, utils, BartForConditionalGeneration 
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

utils.logging.set_verbosity_error()  # Remove line to see warnings

def cuda_info():
    print()
    print("cuda.is_available: \t", torch.cuda.is_available())
    if torch.cuda.is_available():
        print("cuda.device_count: \t", torch.cuda.device_count())
        print("cuda.current_device: \t", torch.cuda.current_device())
        print("cuda.device: \t\t", torch.cuda.device(torch.cuda.current_device()))
        print()
        print("cuda.get_device_name: \t", torch.cuda.get_device_name(torch.cuda.current_device()))
        print("total memory: \t\t", torch.cuda.get_device_properties(0).total_memory)
        print("reserved memory:\t", torch.cuda.memory_reserved(0))
        print("allocated memory:\t", torch.cuda.memory_allocated(0))


    device = "cuda:" + str(torch.cuda.current_device()) if torch.cuda.is_available() else "cpu"
    print()
    print("device name: \t\t", device)
    print("transformers: \t\t", transformers.__version__)
    print("pytorch: \t\t", torch.__version__)
    
def decode_and_print(model, config, sentence):

    encoded_input_ids_1 = tokenizer(sentence, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
    
    with torch.no_grad():
        generation_output = model.generate(
            input_ids = encoded_input_ids_1,
            generation_config = generation_config,
            return_dict_in_generate = True,
            output_scores = True
        )

    for s in generation_output.sequences:
        output = tokenizer.decode(s, skip_special_tokens=True)
        print(output)
        

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

cuda_info()


cuda.is_available: 	 False

device name: 		 cpu
transformers: 		 4.38.2
pytorch: 		 2.2.1


# Decoder models

## DialogGPT

https://huggingface.co/microsoft/DialoGPT-large


In [None]:
# PLANLLM MODEL
moedl_plan = "NOVA-vision-language/PlanLLM"

In [2]:
model_name = "microsoft/DialoGPT-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
cuda_info()

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.75G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]


cuda.is_available: 	 False

device name: 		 cpu
transformers: 		 4.38.2
pytorch: 		 2.2.1


In [3]:
# Let's chat for 5 lines
for step in range(5):
    # encode the new user input, add the eos_token and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids

    # generated a response while limiting the total chat history to 1000 tokens, 
    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

    # pretty print last ouput tokens from bot
    print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))


DialoGPT: I do!
DialoGPT: I'll be in the kitchen.
DialoGPT: I know a few, but I don't know any good ones.
DialoGPT: I will try to find one.
DialoGPT: I will try to find one.


## BART

In [4]:
# Initialize tokenizer and model. Be sure to set output_attentions=True.
# Load BART fine-tuned for summarization on CNN/Daily Mail dataset
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name, output_attentions=True).to(device)
cuda_info()

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]


cuda.is_available: 	 False

device name: 		 cpu
transformers: 		 4.38.2
pytorch: 		 2.2.1


# Decoding Strategies


## Decoding parameters and example

In [5]:
generation_config = model.generation_config

generation_config.temperature = 0.4
generation_config.top_p = 0.8
generation_config.top_k = 10
generation_config.num_beams = 4
generation_config.max_new_tokens = 150

print(generation_config)


GenerationConfig {
  "bos_token_id": 0,
  "decoder_start_token_id": 2,
  "early_stopping": true,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "length_penalty": 2.0,
  "max_length": 142,
  "max_new_tokens": 150,
  "min_length": 56,
  "no_repeat_ngram_size": 3,
  "num_beams": 4,
  "output_attentions": true,
  "pad_token_id": 1,
  "temperature": 0.4,
  "top_k": 10,
  "top_p": 0.8
}



In [8]:

# create ids of encoded input vectors
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

decode_and_print(model, generation_config, sentence)




The London police wrote me a speeding ticket because I was driving too fast. the London police said I was going too fast for the speed limit. the police wrote a ticket because they thought I was speeding. I'm not speeding, I'm driving too slowly, the police said. I was given a ticket for speeding.


## Greedy Decoding

In [9]:
generation_config = model.generation_config
generation_config.do_sample = False
generation_config.num_beams = 1
generation_config.max_new_tokens = 150


In [11]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

decode_and_print(model, generation_config, sentence)


The London police wrote me a speeding ticket because I was driving too fast. the London police said I was speeding because I had a high speed limit. the police wrote a speedingticket because I drove too fast, not because I'm a speeding driver. the officer wrote me the ticket because he thought I was going too fast and I was in a hurry.


## Sampling

### Multinomial Sampling

### Top-k Sampling

In [13]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

generation_config = model.generation_config
generation_config.do_sample = True
generation_config.num_beams = 1
generation_config.temperature = 1

print(generation_config)


GenerationConfig {
  "bos_token_id": 0,
  "decoder_start_token_id": 2,
  "do_sample": true,
  "early_stopping": true,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "length_penalty": 2.0,
  "max_length": 142,
  "max_new_tokens": 150,
  "min_length": 56,
  "no_repeat_ngram_size": 3,
  "output_attentions": true,
  "pad_token_id": 1,
  "top_k": 10,
  "top_p": 0.8
}



In [14]:
for n in range(1,6):
    
    print("## Top k ", n*10)
    generation_config.top_k = n*10
    decode_and_print(model, generation_config, sentence)
    print()


## Top k  10
London police wrote me a speeding ticket because I was driving too fast. the London police wrote. me a ticket because of the speed I was going. the police wrote it because I drove too fast, not because I'm speeding. the officers wrote it as a result of my speed.

## Top k  20
London police wrote a speeding ticket to the author for driving too fast. The author's mother was also given a speeding citation for driving while pregnant. The driver was fined because she was driving too slowly. The ticket was issued because she had been driving too quickly. She was driving a car that was over 50 mph and had a speed limit of 30mph.

## Top k  30
London police wrote me a speeding ticket because I was driving too fast. the London police wrote us a speedingticket because I am driving tooFast. the police wrote an accident ticket because we are driving too Fast. we received a speeding summons because we were driving too slow.

## Top k  40
The London police wrote me a speeding ticket bec

### Top-p sampling

In [15]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

generation_config = model.generation_config
generation_config.do_sample = True
generation_config.num_beams = 1
generation_config.temperature = 1

print(generation_config)


GenerationConfig {
  "bos_token_id": 0,
  "decoder_start_token_id": 2,
  "do_sample": true,
  "early_stopping": true,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "length_penalty": 2.0,
  "max_length": 142,
  "max_new_tokens": 150,
  "min_length": 56,
  "no_repeat_ngram_size": 3,
  "output_attentions": true,
  "pad_token_id": 1,
  "top_p": 0.8
}



In [16]:
for n in range(1,6):
    generation_config.top_p = 0.2*n-0.05
    print("## Top p ", generation_config.top_p)
    decode_and_print(model, generation_config, sentence)
    print()


## Top p  0.15000000000000002
The London police wrote me a speeding ticket because I was driving too fast. the London police said I was speeding because I had a high speed limit. the police wrote a speeding violation because I drove too fast and was driving to fast. I was not speeding. I had no idea I was going too fast, I was just driving too slow.

## Top p  0.35000000000000003
The London police wrote me a speeding ticket because I was driving too fast. the London police write me a ticket becauseI was drivingTooFast. the police wrote the ticket because they thought I was speeding. the officer wrote the speeding ticket for driving too Fast. the policeman wrote the tickets because he thought I drove too fast and was driving TooFast.

## Top p  0.55

## Top p  0.75
The London police wrote me a speeding ticket because I was driving too fast. the London police write me a speed ticket because i was driving Too Fast. the police wrote the ticket because of my driving tooFast. I was going to 

### Contrastive Search
https://huggingface.co/blog/introducing-csearch

### Return sequences

In [17]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

encoded_input_ids_1 = tokenizer(sentence, return_tensors="pt", add_special_tokens=False).input_ids.to(device)

with torch.no_grad():
    generation_output = model.generate(
        input_ids = encoded_input_ids_1,
        num_return_sequences=5, 
        generation_config = generation_config,
        return_dict_in_generate = True,
        output_scores = True
    )

for s in generation_output.sequences:
    print("Output: ")
    output = tokenizer.decode(s, skip_special_tokens=True)
    print(output)
    print()


Output: 
The London police wrote me a speeding ticket because I was driving too fast. The London police handed me a speed ticket becauseI was driving far too fast for the car's limit. the speeding ticket said I was speeding by driving too far. My passenger didn't know I was doing over 100mph.

Output: 

Output: 
The London police wrote me a speeding ticket because I was driving too fast. the London police write me a freeway ticket because it was a speeders car. the car I was speed was going too fast because I was driving too far. I don't know where the ticket was written, but I don’t really care. I didn't think I was being excessive. I believe I was just going too soon. It's not a speeding fine, just a fine for going too quickly.

Output: 
The London police wrote a speeding ticket because they said he was driving too fast. The London police were worried he was travelling too fast, they said. But the woman told her he had not done anything wrong. So she wrote the ticket because he was t

## Beam Search

In [18]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast.'

generation_config = model.generation_config
generation_config.do_sample = False
generation_config.num_beams = 1



In [19]:
for n in range(1,6):

    print("## Beam size of ", n)
    generation_config.num_beams = n
    decode_and_print(model, generation_config, sentence)
    print()


## Beam size of  1




The London police wrote me a speeding ticket because I was driving too fast. the London police said I was speeding because I had a high speed limit. the police wrote a speedingticket because I drove too fast, not because I'm a speeding driver. the officer wrote me the ticket because he thought I was going too fast and I was in a hurry.

## Beam size of  2
The London police wrote me a speeding ticket because I was driving too fast. the London police said I was going too fast and wrote me the ticket. the police said that I was speeding and that I should slow down. I was not speeding. The police were writing me the speeding ticket for driving too quickly.

## Beam size of  3
The London police wrote me a speeding ticket because I was driving too fast. the London police said I was going too fast for the speed limit. the police said that I was doing too much speed. I was not speeding. The London police were writing me a ticket for driving too quickly.

## Beam size of  4
The London police wr

# Decoding with Constraints



In [31]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import transformers
import torch 

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

device = "cpu"

def cuda_info():
    print()
    print("cuda.is_available: \t", torch.cuda.is_available())
    if torch.cuda.is_available():
        print("cuda.device_count: \t", torch.cuda.device_count())
        print("cuda.current_device: \t", torch.cuda.current_device())
        print("cuda.device: \t\t", torch.cuda.device(torch.cuda.current_device()))
        print()
        print("cuda.get_device_name: \t", torch.cuda.get_device_name(torch.cuda.current_device()))
        print("total memory: \t\t", torch.cuda.get_device_properties(0).total_memory)
        print("reserved memory:\t", torch.cuda.memory_reserved(0))
        print("allocated memory:\t", torch.cuda.memory_allocated(0))


    device = "cuda:" + str(torch.cuda.current_device()) if torch.cuda.is_available() else "cpu"
    print()
    print("device name: \t\t", device)
    print("transformers: \t\t", transformers.__version__)
    print("pytorch: \t\t", torch.__version__)

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)

cuda_info()


cuda.is_available: 	 False

device name: 		 cpu
transformers: 		 4.38.2
pytorch: 		 2.2.1


## Repetitions and word lists
### n-gram Repetitions

In [32]:
sentence = 'Yesterday the London police wrote me a speeding ticket because I was driving too fast'

encoded_input_ids_1 = tokenizer(sentence, return_tensors="pt", add_special_tokens=False).input_ids.to(device)

with torch.no_grad():
    generation_output = model.generate(
        input_ids = encoded_input_ids_1,
        no_repeat_ngram_size=1,
        return_dict_in_generate = True,
        output_scores = True
    )

for s in generation_output.sequences:
    print("Output: ")
    output = tokenizer.decode(s, skip_special_tokens=True)
    print(output)
    print()


Output: 
Yesterday the London police wrote me a speeding ticket because I was driving too fast.
'I am



### Force words and bad words


In [33]:
sentence = 'The soldiers'
input_ids = tokenizer(sentence, return_tensors="pt", add_special_tokens=False).input_ids.to(device)

## Forced words
force_disjunctive = ["day two", "day one"]
force_phrasal = "leave now or die"

force_words_ids = [ tokenizer(force_disjunctive, add_special_tokens=False).input_ids,
                    tokenizer(force_phrasal, add_special_tokens=False).input_ids
                  ]

print("## Force word ids:")
for word_ids in force_words_ids:
    if isinstance(word_ids[0], list):
        print("  DisjunctiveConstraint: ", word_ids)
    else:
        print("  PhrasalConstraint: ", word_ids)


## Force word ids:
  DisjunctiveConstraint:  [[820, 734], [820, 530]]
  PhrasalConstraint:  [47408, 783, 393, 4656]


In [34]:
## Bad words
bad_words_set = ["whom", "year"]
bad_words_ids = tokenizer(bad_words_set, add_special_tokens=False).input_ids

print("## Bad word ids:")
for word_ids in bad_words_ids:
    if isinstance(word_ids[0], list):
        print("DisjunctiveConstraint: ", word_ids)
    else:
        print("PhrasalConstraint: ", word_ids)


## Bad word ids:
PhrasalConstraint:  [1929, 296]
PhrasalConstraint:  [1941]


In [35]:

generation_output = model.generate(
    input_ids = input_ids,
    force_words_ids=force_words_ids,
    bad_words_ids=bad_words_ids,
    num_beams = 10,
    num_return_sequences=1,
    no_repeat_ngram_size=6,
    remove_invalid_values=True,
    output_scores = True
)

for s in generation_output:
    print("## Output: ")
    output = tokenizer.decode(s, skip_special_tokens=True)
    print(output)
    print()


## Output: 
The soldiers in the field were not the only ones who were injured.day twoleave now or die



## Constraints



### Phrasal Constraint

In [36]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PhrasalConstraint

#tokenizer = AutoTokenizer.from_pretrained("t5-base")
#model = AutoModelForSeq2SeqLM.from_pretrained("t5-base").to(device)

encoder_input_str = "The soldiers"
input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids.to(device)


force_flexible_set = 'at the base'
tk_list = tokenizer(force_flexible_set, add_special_tokens=False).input_ids

constraints = [
    PhrasalConstraint(tk_list)
]

outputs = model.generate(
    input_ids,
    constraints=constraints,
    num_beams=10,
    num_return_sequences=1,
    no_repeat_ngram_size=5,
    max_length = 30,
    remove_invalid_values=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
The soldiers, who had been stationed at the base, had been ordered to leave the area.

The soldiers, who were stationedat the base


### Disjunctive Constraints

In [37]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PhrasalConstraint, DisjunctiveConstraint

encoder_input_str = "The soldiers"
input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids.to(device)

force_words_set1 = [" stationed", "night"]
words_ids_set1 = tokenizer(force_words_set1, add_special_tokens=False).input_ids
print(words_ids_set1)

constraints = [
    DisjunctiveConstraint(words_ids_set1)
]


[[25967], [3847]]


In [38]:
tokenizer.convert_ids_to_tokens(25967)

'Ġstationed'

In [39]:
outputs = model.generate(
    input_ids,
    constraints=constraints,
    num_beams=10,
    num_return_sequences=1,
    max_length = 30,
    no_repeat_ngram_size=6,
    remove_invalid_values=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
The soldiers, who had been stationed at the base, were taken to a nearby hospital, where they were treated for minor injuries and released.




### List of Constraints

In [40]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PhrasalConstraint, DisjunctiveConstraint

# The prompt
encoder_input_str = "The soldiers"
input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids.to(device)

# First constraint
force_words_set1 = [" stationed", "in the field"]
words_ids_set1 = tokenizer(force_words_set1, add_special_tokens=False).input_ids
constraint_1 = DisjunctiveConstraint(words_ids_set1)

print()
print(force_words_set1)
print(constraint_1.trie.trie)

# Second constraint
force_words_set2 = [" hospital"]
words_ids_set2 = tokenizer(force_words_set2, add_special_tokens=False).input_ids
constraint_2 = DisjunctiveConstraint(words_ids_set2)

print()
print(force_words_set2)
print(constraint_2.trie.trie)

# Third constraint
force_flexible_set = " at the battle"
phrasal_constraints = tokenizer(force_flexible_set, add_special_tokens=False).input_ids
constraint_3 = PhrasalConstraint(phrasal_constraints)

print()
print(force_flexible_set)
print(constraint_3.token_ids)

# The list of constraints
constraints = [ constraint_1, constraint_2,constraint_3 ]



[' stationed', 'in the field']
{25967: {}, 259: {262: {2214: {}}}}

[' hospital']
{4436: {}}

 at the battle
[379, 262, 3344]


In [41]:
outputs = model.generate(
    input_ids,
    constraints=constraints,
    num_beams=10,
    num_return_sequences=1,
    max_length = 30,
    no_repeat_ngram_size=5,
    remove_invalid_values=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
The soldiers stationed at the base were not allowed to leave the base until the end of the war.

"We were told at the battle hospital


## Low-level API

In [42]:
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    ConstrainedBeamSearchScorer,
    PhrasalConstraint, MaxLengthCriteria,
    LogitsProcessorList, StoppingCriteriaList,
    MinLengthLogitsProcessor
)

# lets run beam search using 3 beams
num_beams = 3

encoder_input_str = "The soldier"
input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids.to(device)

input_ids = input_ids.repeat_interleave(num_beams, dim=0)

constraint_str = ["black", "country"]
constraint_token_ids = tokenizer.encode(constraint_str)[:-1]  # slice to remove eos token
constraints = [PhrasalConstraint(token_ids=constraint_token_ids)]

# instantiate beam scorer
beam_scorer = ConstrainedBeamSearchScorer(
    batch_size=1, num_beams=num_beams, device=model.device, max_length = 50, constraints=constraints
)

# instantiate logits processors
logits_processor = LogitsProcessorList(
    [
        MinLengthLogitsProcessor(5, eos_token_id=model.config.eos_token_id),
    ]
)

outputs = model.constrained_beam_search(
    input_ids, beam_scorer, constraints=constraints, stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=50)]), logits_processor=logits_processor
)

tokenizer.batch_decode(outputs, skip_special_tokens=True)

['The soldier, who was wearing a black T-shirt and jeans, said he had been in the country for two years.\n\n"I was in the country for two years. I was in the country for two years," he said.black']

# Summary