In [148]:
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, TransformChain, SequentialChain
from tqdm import trange
import json
from pprint import pprint
import re
# import python-dotenv and use it to set OPENAI_API_KEY as an environment variable
from dotenv import load_dotenv
load_dotenv()

from stegosaurus.acrostic import acrostic_generator_prompt, acrostic_evaluator_prompt, multi_acrostic_generator_prompt

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [85]:
def clean_output(s):
    s = s.strip()
    # if there is more than one sentence, keep only the first one
    # (this is a hack to deal with the fact that the prompt is not always followed)
    # cut anything after a sentence ending punctuation, but keep the punctuation itself
    if '.' in s:
        s = s[:s.index('.')+1]
    elif '?' in s:
        s = s[:s.index('?')+1]
    elif '!' in s:
        s = s[:s.index('!')+1]
    return s
    
    # reformat the outputs to be a single string containing a numbered list of sentences, one per line
def outputs_to_multiple_choice(outputs):
    output_string = ""
    for i, output in enumerate(outputs):
        output_string += f"{i+1}. {output['cleaned_new_sentence']}\n"
    output_string += f'{len(outputs)+1}. None of these look good. AGA, try again.'
    return output_string

def get_selection_from_evaluator_output(output):
    try:
        return int(json.loads(output['evaluator_output'])['selected_option'])
    except:
        return output

In [100]:
def generate_options(
    next_sentence_chain, 
    original_text, 
    acrostic_phrase, 
    rewritten_text, 
    current_starting_letter,
    n_attempts:int=5,
    ):
    outputs = []
    inputs = dict(
        original_text=original_text,
        acrostic_phrase=acrostic_phrase,
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,)
    for i in trange(n_attempts):
        output = next_sentence_chain(
            inputs=inputs
            )
        output['cleaned_new_sentence'] = clean_output(output['new_sentence'])
        outputs.append(output)
        
    options = outputs_to_multiple_choice(outputs)
    n_options = len(outputs)
    return outputs, options, n_options


def evaluate_options(
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    options,
    ):
    inputs = {
            'original_text': original_text,
            'acrostic_phrase': acrostic_phrase,
            'rewritten_text': rewritten_text,
            'current_starting_letter': current_starting_letter,
            'options': options
        }
    evaluator_output = acrostic_evaluator_chain(inputs=inputs)
    evaluator_output['n_options'] = n_options
    evaluator_output['evaluator_selection'] = get_selection_from_evaluator_output(evaluator_output)
    return evaluator_output

In [113]:
def gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts:int=5,
    ):
    outputs, options, n_options = generate_options(
        next_sentence_chain=next_sentence_chain, 
        original_text=original_text,
        acrostic_phrase=acrostic_phrase, 
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,
        n_attempts=n_attempts,
        )
    evaluator_output = evaluate_options(
        acrostic_evaluator_chain=acrostic_evaluator_chain,
        original_text=original_text,
        acrostic_phrase=acrostic_phrase,
        rewritten_text=rewritten_text,
        current_starting_letter=current_starting_letter,
        options=options,
        )
    return outputs, options, n_options, evaluator_output

In [2]:
chatgpt = OpenAI(
    model_name='gpt-3.5-turbo',
    temperature=1,
    )
next_sentence_chain = LLMChain(
    llm=chatgpt, 
    prompt=acrostic_generator_prompt,
    output_key='new_sentence',
    )

original_text = "On Earth, volcanoes are most often found where tectonic plates are diverging or converging, and most are found underwater. For example, a mid-ocean ridge, such as the Mid-Atlantic Ridge, has volcanoes caused by divergent tectonic plates whereas the Pacific Ring of Fire has volcanoes caused by convergent tectonic plates. Volcanoes can also form where there is stretching and thinning of the crust's plates, such as in the East African Rift and the Wells Gray-Clearwater volcanic field and Rio Grande rift in North America. Volcanism away from plate boundaries has been postulated to arise from upwelling diapirs from the core–mantle boundary, 3,000 kilometers (1,900 mi) deep in the Earth. This results in hotspot volcanism, of which the Hawaiian hotspot is an example. Volcanoes are usually not created where two tectonic plates slide past one another."
acrostic_phrase = "FOOBAR"


acrostic_letter_index = 0
acrostic_letters = [letter for letter in acrostic_phrase]
current_starting_letter = acrostic_letters[acrostic_letter_index]
rewritten_text = ""


In [3]:

acrostic_evaluator_chain = LLMChain(
    llm=OpenAI(
        model_name='gpt-3.5-turbo',
        temperature=0.5,
        ), 
    prompt=acrostic_evaluator_prompt,
    output_key='evaluator_output',
    )



In [114]:
outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )

100%|██████████| 3/3 [00:03<00:00,  1.25s/it]


In [136]:
outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )

100%|██████████| 3/3 [00:04<00:00,  1.62s/it]


In [137]:
n_regenerations = 0

if accept_sentence(evaluator_output):
    rewritten_text += outputs[evaluator_output['evaluator_selection']-1]['cleaned_new_sentence'] + ' '
    acrostic_letter_index += 1
    current_starting_letter = acrostic_letters[acrostic_letter_index]
    print('Accepted sentence!')
else:
    print('Regenerating options...')
    n_regenerations += 1
    outputs, options, n_options, evaluator_output = gen_and_evaluate(
    next_sentence_chain, 
    acrostic_evaluator_chain,
    original_text,
    acrostic_phrase,
    rewritten_text,
    current_starting_letter,
    n_attempts=3,
    )


Accepted sentence!


In [135]:
pprint(rewritten_text)

('For the most part, volcanoes are located in areas where tectonic plates are '
 'either moving apart or coming together. Often these areas are found '
 'underwater, but there are also land-based examples such as the East African '
 'Rift. On the other hand, some of the most dramatic examples of volcanism '
 'occur at plate boundaries. But volcanoes can also form in areas where the '
 "crust's plates are stretched and thinned, like in the Wells Gray-Clearwater "
 'volcanic field and the Rio Grande rift in North America. ')


# try multi

In [182]:
original_text = "On Earth, volcanoes are most often found where tectonic plates are diverging or converging, and most are found underwater. For example, a mid-ocean ridge, such as the Mid-Atlantic Ridge, has volcanoes caused by divergent tectonic plates whereas the Pacific Ring of Fire has volcanoes caused by convergent tectonic plates. Volcanoes can also form where there is stretching and thinning of the crust's plates, such as in the East African Rift and the Wells Gray-Clearwater volcanic field and Rio Grande rift in North America. Volcanism away from plate boundaries has been postulated to arise from upwelling diapirs from the core–mantle boundary, 3,000 kilometers (1,900 mi) deep in the Earth. This results in hotspot volcanism, of which the Hawaiian hotspot is an example. Volcanoes are usually not created where two tectonic plates slide past one another."
acrostic_phrase = "FOOBAR"


In [183]:
def get_current_acrostic_letter(inputs):
    acrostic_phrase = inputs['acrostic_phrase']
    acrostic_letter_index = inputs['acrostic_letter_index']
    acrostic_letters = [letter for letter in acrostic_phrase]
    current_starting_letter = acrostic_letters[acrostic_letter_index]
    return {'current_starting_letter': current_starting_letter}
        
def get_current_original_sentence(inputs):
    acrostic_letter_index = inputs['acrostic_letter_index']
    sentences = [s for s in inputs['original_text'].split('.') if len(s) > 0]
    return {'current_original_sentence': sentences[acrostic_letter_index]}

def split_into_sentences(inputs):
    text = inputs['generated_sentence_options']
    sentences = re.split(r'(?<=[.?!])\s+(?=[0-9])', text)
    sentences = [s.strip() for s in sentences]
    sentences = [re.sub(r'^[0-9]+\.\s*', '', s) for s in sentences]
    
    return {'generated_sentence_options_list': sentences}

def generate_clean_options(inputs):
    generated_sentence_options_list = inputs['generated_sentence_options_list']
    output_string = ""
    for i, sentence in enumerate(generated_sentence_options_list):
        output_string += f"{i+1}. {sentence}\n"
    output_string += f'{len(generated_sentence_options_list)+1}. None of these look good. AGA, try again.'
    return {'clean_options': output_string}

def extract_selection(inputs):
    selection = int(json.loads(inputs['evaluator_output'])['selected_option'])
    return {'evaluator_selection': selection}

def resolve_next_action(inputs):
    
    try:
        assert isinstance(inputs['evaluator_selection'], int)
    except AssertionError:
        action = 'regenerate (error)'
    if inputs['evaluator_selection'] == inputs['n_attempts']+1:
        action = 'regenerate (requested)'
    if inputs['evaluator_selection'] <= inputs['n_attempts']:
        action = 'accept sentence'
    return {'next_action': action}
acrostic_letter_chain = TransformChain(
    input_variables=['acrostic_phrase', 'acrostic_letter_index'],
    transform=get_current_acrostic_letter,
    output_variables=['current_starting_letter'],
)

current_original_sentence = TransformChain(
    input_variables=['original_text', 'acrostic_letter_index'],
    transform=get_current_original_sentence,
    output_variables=['current_original_sentence'],
)

acrostic_generator_chain = LLMChain(
    llm=OpenAI(
        model_name='gpt-3.5-turbo',
        temperature=1,
        ), 
    prompt=multi_acrostic_generator_prompt,
    output_key='generated_sentence_options',
    )

split_sentences_chain = TransformChain(
    input_variables=['generated_sentence_options'],
    transform=split_into_sentences,
    output_variables=['generated_sentence_options_list'],
)

generate_clean_options_chain = TransformChain(
    input_variables=['generated_sentence_options_list'],
    transform=generate_clean_options,
    output_variables=['clean_options'],
)

acrostic_evaluator_chain = LLMChain(
    llm=OpenAI(model_name='gpt-3.5-turbo', temperature=0.7),
    prompt=acrostic_evaluator_prompt,
    output_key='evaluator_output',
)

extract_selection_chain = TransformChain(
    input_variables=['evaluator_output'],
    transform=extract_selection,
    output_variables=['evaluator_selection'],
)

resolve_next_action_chain = TransformChain(
    input_variables=['evaluator_selection', 'n_attempts'],
    transform=resolve_next_action,
    output_variables=['next_action'],
)

full_chain = SequentialChain(
    chains=[
        acrostic_letter_chain, 
        current_original_sentence,
        acrostic_generator_chain,
        split_sentences_chain,
        generate_clean_options_chain,
        acrostic_evaluator_chain,
        extract_selection_chain,
        resolve_next_action_chain,
        ],
    input_variables=[
        'original_text', 
        'acrostic_phrase', 
        'rewritten_text', 
        'acrostic_letter_index',
        'n_attempts',
        ],
    output_variables=[
        'original_text', 
        'acrostic_phrase', 
        'rewritten_text', 
        'acrostic_letter_index',
        'n_attempts',
        'current_starting_letter',
        'current_original_sentence',
        'generated_sentence_options',
        'generated_sentence_options_list',
        'clean_options',
        'evaluator_output',
        'evaluator_selection',
        'next_action',
    ],
    verbose=False
)

In [172]:
from pydantic import BaseModel
from pprint import pformat

class AcrosticGeneratorInputs(BaseModel):
    original_text: str
    acrostic_phrase: str
    rewritten_text: str
    acrostic_letter_index: int
    n_attempts: int
    
    def __repr__(self) -> str:
        original_sentences = self.original_text.strip().split('.')
        original_sentences = [f'{s.strip()}.' for s in original_sentences if s != '']
        original_text_str = '\n'.join(original_sentences)
        
        rewritten_sentences = self.rewritten_text.strip().split('.')
        rewritten_sentences = [f'{s.strip()}.' for s in rewritten_sentences if s != '']
        rewritten_text_str = '\n'.join(rewritten_sentences)
        s = f"""
AcrosticGeneratorInputs

acrostic_phrase: {self.acrostic_phrase}

original_text:\n{original_text_str}

acrostic_letter_index: {self.acrostic_letter_index}
rewritten_text:\n{rewritten_text_str}

        """
        return s

In [184]:
class AcrosticGenerationRoundResult(BaseModel):
    round_number: int
    inputs: AcrosticGeneratorInputs
    output: dict
    

In [192]:
initial_inputs = AcrosticGeneratorInputs(
    original_text = """
I know a yellow labrador retriever named Lily.
She is a little over 2 years old, which is pretty much grown up.
She likes to play fetch a lot and it's a good way to tire her out.
She doesn't like to cuddle much, but as she gets older she likes it a bit more.
    """,
    acrostic_phrase = "LILY",
    rewritten_text = "",
    acrostic_letter_index = 0,
    n_attempts=1,
)
results = []
verbose = True
inputs = initial_inputs.copy()

In [193]:
current_round_number = 0
n_regenerations_for_current_round = 0

In [197]:
output = full_chain(inputs=inputs.dict())
current_round_result = AcrosticGenerationRoundResult(
    round_number=current_round_number,
    inputs=inputs.copy(),
    output=output,
)
results.append(current_round_result)

if output['next_action'] == 'accept sentence':
    
    selection_idx = output['evaluator_selection'] - 1
    accepted_sentence = output['generated_sentence_options_list'][selection_idx] + ' '
    inputs.rewritten_text += accepted_sentence
    print(f'Accepted sentence:\n{output["evaluator_selection"]}. {accepted_sentence}')
    inputs.acrostic_letter_index += 1
    n_regenerations_for_current_round = 0
else:
    print(f'{output["next_action"]}')
    n_regenerations_for_current_round += 1
current_round_number += 1

pprint(output['clean_options'])

inputs

Accepted sentence:
1. Young children often ask to pet Lily when they see her walking down the street. 
('1. Young children often ask to pet Lily when they see her walking down the '
 'street.\n'
 '2. None of these look good. AGA, try again.')



AcrosticGeneratorInputs

acrostic_phrase: LILY

original_text:
I know a yellow labrador retriever named Lily.
She is a little over 2 years old, which is pretty much grown up.
She likes to play fetch a lot and it's a good way to tire her out.
She doesn't like to cuddle much, but as she gets older she likes it a bit more.

acrostic_letter_index: 4
rewritten_text:
Lily's name suits her well because she has a light yellow coat.
It's amazing how quickly she learned how to do tricks at such a young age.
Lily is always excited to go on walks and explore new places.
Young children often ask to pet Lily when they see her walking down the street.

        