In [72]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the pre-trained GPT-2 language model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

In [75]:
def generate_sentence_with_sequence(start_word, sequence):
    """
    Generate a coherent sentence starting with a given word and following a sequence of starting letters.
    
    Args:
        start_word (str): The word to start the sentence.
        sequence (list of str): A sequence of letters that subsequent words in the sentence should start with.
        
    Returns:
        str: A generated sentence that starts with the start_word and where each subsequent word begins 
             with the corresponding letter in the sequence.
             
    The function will attempt to find words that not only start with the specified letters in the sequence 
    but also fit coherently within the context of the sentence. If no suitable word is found after a 
    specified number of attempts, it prints an error message and exits the loop.
    """
    
    generated = start_word  # Initialize the generated text with the start word

    for index, letter in enumerate(sequence):
        attempts = 0
        found_word = False

        while not found_word and attempts < 100:
            prompt_text = generated
            input_ids = tokenizer.encode(prompt_text, return_tensors='pt')

            # Generate multiple sequences to choose from
            outputs = model.generate(
                input_ids,
                max_length=input_ids.shape[1] + 10,
                temperature=0.8,
                num_return_sequences=5,
                do_sample=True,
                top_k=50,
                top_p=0.95,
                repetition_penalty=1.2,
                pad_token_id=tokenizer.eos_token_id
            )

            candidate_words = set()
            for output in outputs:
                generated_text = tokenizer.decode(output, skip_special_tokens=True)
                new_text = generated_text[len(prompt_text):].strip()
                words = new_text.split()

                for word in words:
                    if word.lower().startswith(letter):
                        candidate_words.add(word)
                        break

            best_word = None
            best_score = float('-inf')
            for word in candidate_words:
                test_sentence = generated + ' ' + word
                test_input_ids = tokenizer.encode(test_sentence, return_tensors='pt')
                test_output = model(test_input_ids)

                next_token = sequence[index + 1] if index + 1 < len(sequence) else '.'
                next_token_id = tokenizer.encode(next_token, return_tensors='pt')[0][0]

                scores = test_output.logits[0, -1, next_token_id].item()

                if scores > best_score:
                    best_score = scores
                    best_word = word

            if best_word:
                generated += (' ' + best_word)
                found_word = True
            else:
                attempts += 1

        if not found_word:
            print(f"Could not find a suitable word starting with '{letter}'.")
            break

    return generated

In [77]:
# Example usage
start_word = "Important"
sequence = ["n", "c", "e", "p", "t", "i", "o", "n"]  # Example sequence
generated_sentence = generate_sentence_with_sequence(start_word, sequence)
print(generated_sentence)

Important not comment email points to It or note
