In [49]:
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from tqdm import trange
import json
from pprint import pprint

# import python-dotenv and use it to set OPENAI_API_KEY as an environment variable
from dotenv import load_dotenv
load_dotenv()

True

In [83]:
acrostic_generator_prompt = PromptTemplate(
    input_variables=["original_text", "acrostic_phrase", "rewritten_text", "current_starting_letter"],
    template="""
You are an acrostic generating AI. I will give you some text and an acrostic phrase. Your job is to rewrite the text so that the first letter in each sentence spells out the acrostic phrase, while still preserving the meaning of the original text.

We'll do one sentence at a time. I'll give you the first letter of the new sentence, and your job will be to write a new sentence that starts with that letter. Then I'll check your work. If it looks good, I'll add it to the rewritten text and we'll move on to the next sentence. If not, I'll ask you to try again. 

Original text: "{original_text}"
Acrostic phrase: "{acrostic_phrase}"
Rewritten text so far: "{rewritten_text}"
Starting letter for the next sentence: {current_starting_letter}

Now provide your answer. Remember that the first word must begin with the letter {current_starting_letter}.
"""
)

In [84]:
acrostic_evaluator_prompt = PromptTemplate(
    input_variables=[
        "original_text", 
        "acrostic_phrase", 
        "rewritten_text", 
        "current_starting_letter",
        "options",
        ],
    template="""
You are a helper to an acrostic generating AI named AGA. 

I gave AGA instructions to rewrite some text so that the first letter in each sentence spells out an acrostic phrase, while still preserving the meaning of the original text.
AGA writes one sentence at a time and makes several guesses. Your job is to look at AGA's guesses and pick which one is the best.
A good sentence should start with the letter {current_starting_letter} and should also put us on track to end up with a rewritten text that captures the meaning of the original text while also sounding natural.

These are the inputs I gave AGA:
Original text: "{original_text}"
Acrostic phrase: "{acrostic_phrase}"
Rewritten text so far: "{rewritten_text}"
Starting letter for the next sentence: {current_starting_letter}

Now I'll give you AGA's attempts in multiple choice format. Your job is to pick the best one. If none of them look good, you can also indicate that and AGA will try again.

Provide your answer in the following format:
{{"selected_option": 1}}

Here are your options:
{options}
"""
)

In [85]:
def clean_output(s):
    s = s.strip()
    # if there is more than one sentence, keep only the first one
    # (this is a hack to deal with the fact that the prompt is not always followed)
    # cut anything after a sentence ending punctuation, but keep the punctuation itself
    if '.' in s:
        s = s[:s.index('.')+1]
    elif '?' in s:
        s = s[:s.index('?')+1]
    elif '!' in s:
        s = s[:s.index('!')+1]
    return s
    
    # reformat the outputs to be a single string containing a numbered list of sentences, one per line
def outputs_to_multiple_choice(outputs):
    output_string = ""
    for i, output in enumerate(outputs):
        output_string += f"{i+1}. {output['cleaned_new_sentence']}\n"
    output_string += f'{len(outputs)+1}. None of these look good. AGA, try again.'
    return output_string

def get_selection_from_evaluator_output(output):
    try:
        return int(json.loads(output['evaluator_output'])['selected_option'])
    except:
        return output

In [100]:
def generate_options(
    next_sentence_chain, 
    original_text, 
    acrostic_phrase, 
    rewritten_text, 
    current_starting_letter,
    n_attempts:int=5,
    ):
    outputs = []
    inputs = dict(
        original_text=original_text,
        acrostic_phrase=acrostic_phrase,
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,)
    for i in trange(n_attempts):
        output = next_sentence_chain(
            inputs=inputs
            )
        output['cleaned_new_sentence'] = clean_output(output['new_sentence'])
        outputs.append(output)
        
    options = outputs_to_multiple_choice(outputs)
    n_options = len(outputs)
    return outputs, options, n_options


def evaluate_options(
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    options,
    ):
    inputs = {
            'original_text': original_text,
            'acrostic_phrase': acrostic_phrase,
            'rewritten_text': rewritten_text,
            'current_starting_letter': current_starting_letter,
            'options': options
        }
    evaluator_output = acrostic_evaluator_chain(inputs=inputs)
    evaluator_output['n_options'] = n_options
    evaluator_output['evaluator_selection'] = get_selection_from_evaluator_output(evaluator_output)
    return evaluator_output

In [113]:
def gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts:int=5,
    ):
    outputs, options, n_options = generate_options(
        next_sentence_chain=next_sentence_chain, 
        original_text=original_text,
        acrostic_phrase=acrostic_phrase, 
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,
        n_attempts=n_attempts,
        )
    evaluator_output = evaluate_options(
        acrostic_evaluator_chain=acrostic_evaluator_chain,
        original_text=original_text,
        acrostic_phrase=acrostic_phrase,
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,
        options=options,
        )
    return outputs, options, n_options, evaluator_output

In [101]:
chatgpt = OpenAI(
    model_name='gpt-3.5-turbo',
    temperature=1,
    )
next_sentence_chain = LLMChain(
    llm=chatgpt, 
    prompt=acrostic_generator_prompt,
    output_key='new_sentence',
    )

original_text = "On Earth, volcanoes are most often found where tectonic plates are diverging or converging, and most are found underwater. For example, a mid-ocean ridge, such as the Mid-Atlantic Ridge, has volcanoes caused by divergent tectonic plates whereas the Pacific Ring of Fire has volcanoes caused by convergent tectonic plates. Volcanoes can also form where there is stretching and thinning of the crust's plates, such as in the East African Rift and the Wells Gray-Clearwater volcanic field and Rio Grande rift in North America. Volcanism away from plate boundaries has been postulated to arise from upwelling diapirs from the core–mantle boundary, 3,000 kilometers (1,900 mi) deep in the Earth. This results in hotspot volcanism, of which the Hawaiian hotspot is an example. Volcanoes are usually not created where two tectonic plates slide past one another."
acrostic_phrase = "FOOBAR"


acrostic_letter_index = 0
acrostic_letters = [letter for letter in acrostic_phrase]
current_starting_letter = acrostic_letters[acrostic_letter_index]
rewritten_text = ""


100%|██████████| 3/3 [00:05<00:00,  1.69s/it]


In [87]:

acrostic_evaluator_chain = LLMChain(
    llm=OpenAI(
        model_name='gpt-3.5-turbo',
        temperature=0.5,
        ), 
    prompt=acrostic_evaluator_prompt,
    output_key='evaluator_output',
    )



In [114]:
outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )

100%|██████████| 3/3 [00:03<00:00,  1.25s/it]


In [125]:
def accept_sentence(evaluator_output):
    try:
        assert isinstance(evaluator_output['evaluator_selection'], int)
    except AssertionError:
        return False
    n_options = evaluator_output['n_options']
    if evaluator_output['evaluator_selection'] == n_options+1:
        return False
    if evaluator_output['evaluator_selection'] <= n_options:
        return True

In [136]:
outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )

100%|██████████| 3/3 [00:04<00:00,  1.62s/it]


In [137]:
n_regenerations = 0

if accept_sentence(evaluator_output):
    rewritten_text += outputs[evaluator_output['evaluator_selection']-1]['cleaned_new_sentence'] + ' '
    acrostic_letter_index += 1
    current_starting_letter = acrostic_letters[acrostic_letter_index]
    print('Accepted sentence!')
else:
    print('Regenerating options...')
    n_regenerations += 1
    outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )


Accepted sentence!


In [135]:
pprint(rewritten_text)

('For the most part, volcanoes are located in areas where tectonic plates are '
 'either moving apart or coming together. Often these areas are found '
 'underwater, but there are also land-based examples such as the East African '
 'Rift. On the other hand, some of the most dramatic examples of volcanism '
 'occur at plate boundaries. But volcanoes can also form in areas where the '
 "crust's plates are stretched and thinned, like in the Wells Gray-Clearwater "
 'volcanic field and the Rio Grande rift in North America. ')
