In [1]:
%load_ext autoreload
%autoreload 2

# Test querying GPT 3.5 Turbo

In [2]:
import tqdm
import pandas as pd
import json
import re

import pathlib
import sys

# Add src module to path before import.
sys.path.insert(0, str(pathlib.Path('../../src')))

from file_IO_handler import get_plaintext_file_contents, save_json, load_json
from fill_string_template import get_filled_strings_from_dataframe, FilledString

## Get Access to OpenAI API

In [3]:
OPENAI_API_KEY = get_plaintext_file_contents(pathlib.Path("../../OPENAI_API_KEY.env"))
# print(OPENAI_API_KEY)

MODEL_NAME = "gpt-3.5-turbo"

In [4]:
from openai import OpenAI
import os

client = OpenAI(api_key=OPENAI_API_KEY)

In [5]:
def Completion(prompt, is_json=False, is_short_ans=False):
    if is_json:
        return client.chat.completions.create(
            model="gpt-3.5-turbo",
            response_format={ "type": "json_object" },
            messages=[{"role": "system", "content": "You are a helpful assistant designed to output JSON."}, {"role": "user", "content": prompt}]
        )
    elif is_short_ans:
        return client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": "Output one word or phrase"}, {"role": "user", "content": prompt}]
        )
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

In [6]:
# response = Completion("When was Google founded?")
# print(response.choices[0].message.content)

# Google was founded on September 4, 1998.

In [7]:
# response = Completion("When was Google founded?", is_json=True)
# print(response.choices[0].message.content)

# {
#     "founder": "Larry Page and Sergey Brin",
#     "founded_date": "September 4, 1998"
# }

### Target Domains
Choose some interesting target domains

In [8]:
target_domains = ["Kpop", "baseball", "Iron Man", "money"]

## Save Locations

In [9]:
# Save every call
prompt_1_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_1_output")
prompt_2_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_2_output")
prompt_3_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_3_output")
prompt_4_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_4_output")
prompt_5_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_5_output")
prompt_6_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_6_output")
prompt_7_output_save_to_prefix = pathlib.Path("../generations/notebook_run/prompt_7_output")

# Consolidated to csv
prompt_1_output_save_to = pathlib.Path("../generations/notebook_run/prompt_1_output.csv")
prompt_2_output_save_to = pathlib.Path("../generations/notebook_run/prompt_2_output.csv")
prompt_3_output_save_to = pathlib.Path("../generations/notebook_run/prompt_3_output.csv")
prompt_4_output_save_to = pathlib.Path("../generations/notebook_run/prompt_4_output.csv")
prompt_5_output_save_to = pathlib.Path("../generations/notebook_run/prompt_5_output.csv")
prompt_6_output_save_to = pathlib.Path("../generations/notebook_run/prompt_6_output.csv")
prompt_7_output_save_to = pathlib.Path("../generations/notebook_run/prompt_7_output.csv")

## Prepare Prompts

In [10]:
prompt_1_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_1_get_subtensors.txt"))
prompt_2_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_2_get_purpose_and_mechanism.txt"))
prompt_3_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_3_annotate_text.txt"))
prompt_4_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_4_create_schema.txt"))
prompt_5_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_5_identify_subvehicle.txt"))
prompt_6_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_6_write_analogy.txt"))
prompt_7_template = get_plaintext_file_contents(pathlib.Path("../prompt_template/prompt_7_write_poem.txt"))

In [11]:
print(prompt_1_template)

List the sub-concepts associated with ${tensor_name} from ${source_domain} in json format.

Desired format of output: json list of objects.
```
{
	"sub_concepts": [
		{
			"name": sub-concept’s name (string)
			"definition": sub-concept definition (string)
		},
	...
	]
}
```


In [12]:
def run_prompt_1(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
    }
    
    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_1_output_save_to_prefix}_idx_{idx}.json")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    

    if save_path.exists():
        print(f"Loading from {save_path}")
        out_content = load_json(save_path)
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=True)
        out_content = json.loads(out.choices[0].message.content)
        
        # save output
        save_json(out_content, save_path)
        print(f"Called model and saved to {save_path}")
    
    for obj in out_content["sub_concepts"]:
        pre_df["level_of_difficulty"].append(level_of_difficulty)
        pre_df["tensor_name"].append(tensor_name)
        pre_df["source_domain"].append(source_domain)
        # new
        pre_df["subtensor_name"].append(obj["name"])
        pre_df["subtensor_definition"].append(obj["definition"])

    df_prompt_2_input = pd.DataFrame(pre_df)
    df_prompt_2_input.to_csv(prompt_1_output_save_to)
    
    return df_prompt_2_input

In [13]:
print(prompt_2_template)

Describe the purpose and mechanism of the ${subtensor_name} relative to ${tensor_name} using terms specific to geometry, math, engineering, and design.

Name: ${subtensor_name}
Definition: ${subtensor_definition}

Desired format of output: json object.
```
{
	"purpose": what purpose the ${subtensor_name} fulfills and why, without using ${source_domain}-specific terms.
	"mechanism": how the ${subtensor_name} is structured and how that structure supports the purpose
}
```


In [14]:
def run_prompt_2(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
    }
    
    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_2_output_save_to_prefix}_idx_{idx}.json")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        out_content = load_json(save_path)
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=True)
        out_content = json.loads(out.choices[0].message.content)
        
        # save output
        save_json(out_content, save_path)
        print(f"Called model and saved to {save_path}")
    
    pre_df["level_of_difficulty"].append(level_of_difficulty)
    pre_df["tensor_name"].append(tensor_name)
    pre_df["source_domain"].append(source_domain)
    pre_df["subtensor_name"].append(subtensor_name)
    pre_df["subtensor_definition"].append(subtensor_definition)
    # new
    pre_df["subtensor_purpose"].append(out_content["purpose"])
    pre_df["subtensor_mechanism"].append(out_content["mechanism"])
    pre_df["text"].append(out_content["purpose"] + " " + out_content["mechanism"])

    df_prompt_3_input = pd.DataFrame(pre_df)
    df_prompt_3_input.to_csv(prompt_2_output_save_to)
    
    return df_prompt_3_input

In [15]:
print(prompt_3_template)

Annotate the following text. 

Surround in double curly braces any words, phrases, and adjectives related to ${source_domain}, chemistry, or scientific jargon.

Surround in square braces any words, phrases, and adjectives related to geometry, math, engineering, and design.

Text: """${text}"""

Output annotated text:



In [16]:
def redact_func(str, annotated_str, list_of_redact_words):
    """
    Replace everything inside {{}} with XXX.
    Replace all instances of {{}} with XXX.
    Replace all instances of match_words with XXX
    """    
    # Add annotated words
    annotated_words = [match.replace("{{", "").replace("}}", "") for match in re.findall(r'\{\{[^\}]*\}\}', annotated_str)]

    # Add capitalized words not following punctuation
    capitalized_words = [match.strip() for match in re.findall(r'\.? *[A-Z][a-z]*', str) if 
                         not match.startswith(".") and 
                         not match.startswith("?") and
                         not match.startswith("!")
                        ]
    
    # Add other known words
    redact_words = [*annotated_words, *capitalized_words, *list_of_redact_words]

    # Split on spaces
    nested_list = [word.split(" ") for word in redact_words]
    flat_list = []
    for sublist in nested_list:
        for item in sublist:
            flat_list.append(item)
    redact_words = flat_list
    
    # Handle singular versions
    singular_words = [word[:-1] for word in redact_words if word.endswith("s")]
    redact_words = [*redact_words, *singular_words]
    
    # Redact
    str_text = str
    for word in redact_words:
        str_text = str_text.replace(word, "XXX")
    return str_text

In [17]:
def run_prompt_3(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
        "text_annotated": [],
        "text_redacted": [],
    }
    
    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_3_output_save_to_prefix}_idx_{idx}.txt")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    subtensor_purpose = filled_strings[idx].values["subtensor_purpose"]
    subtensor_mechanism = filled_strings[idx].values["subtensor_mechanism"]
    text = filled_strings[idx].values["text"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        with open(save_path, 'r') as file:
            out_content = file.read()
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=False)
        out_content = out.choices[0].message.content
        
        # save output
        with open(save_path, 'w+') as file:
            file.write(out_content)
        print(f"Called model and saved to {save_path}")
    
    pre_df["level_of_difficulty"].append(level_of_difficulty)
    pre_df["tensor_name"].append(tensor_name)
    pre_df["source_domain"].append(source_domain)
    pre_df["subtensor_name"].append(subtensor_name)
    pre_df["subtensor_definition"].append(subtensor_definition)
    pre_df["subtensor_purpose"].append(subtensor_purpose)
    pre_df["subtensor_mechanism"].append(subtensor_mechanism)
    pre_df["text"].append(text)
    # new
    pre_df["text_annotated"].append(out_content)
    pre_df["text_redacted"].append(
        redact_func(
            text, 
            out_content, 
            [tensor_name, source_domain, subtensor_name]
        )
    )

    df_prompt_4_input = pd.DataFrame(pre_df)
    df_prompt_4_input.to_csv(prompt_3_output_save_to)
    
    return df_prompt_4_input

In [18]:
print(prompt_4_template)

Given the following redacted text, create a list of engineering design principles used.

text: """${text_redacted}"""

Desired format of output: json list of objects.
```
{
	"engineering_design_principles": [
		{
			"description": name of engineering design principles
			"mechanism": summarize description of purpose for and mechanism utilized by the design principle
		},
		...
	]
}
```


In [19]:
def run_prompt_4(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
        "text_annotated": [],
        "text_redacted": [],
        "schema": [],
        "target_domain": [],
    }

    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_4_output_save_to_prefix}_idx_{idx}.json")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    subtensor_purpose = filled_strings[idx].values["subtensor_purpose"]
    subtensor_mechanism = filled_strings[idx].values["subtensor_mechanism"]
    text = filled_strings[idx].values["text"]
    text_annotated = filled_strings[idx].values["text_annotated"]
    text_redacted = filled_strings[idx].values["text_redacted"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        out_content = load_json(save_path)
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=True)
        out_content = json.loads(out.choices[0].message.content)
        
        # save output
        save_json(out_content, save_path)
        print(f"Called model and saved to {save_path}")

    for target_idx in range(len(target_domains)):
        pre_df["level_of_difficulty"].append(level_of_difficulty)
        pre_df["tensor_name"].append(tensor_name)
        pre_df["source_domain"].append(source_domain)
        pre_df["subtensor_name"].append(subtensor_name)
        pre_df["subtensor_definition"].append(subtensor_definition)
        pre_df["subtensor_purpose"].append(subtensor_purpose)
        pre_df["subtensor_mechanism"].append(subtensor_mechanism)
        pre_df["text"].append(text)
        pre_df["text_annotated"].append(text_annotated)
        pre_df["text_redacted"].append(text_redacted)
        # new
        pre_df["schema"].append(json.dumps(out_content["engineering_design_principles"], indent=2))
        pre_df["target_domain"].append(target_domains[target_idx])

    df_prompt_5_input = pd.DataFrame(pre_df)
    df_prompt_5_input.head()
    df_prompt_5_input.to_csv(prompt_4_output_save_to)
    
    return df_prompt_5_input

In [20]:
print(prompt_5_template)

Describe a feature in the domain of ${target_domain} that satisfies all the features of the following spec.

Spec:```
${schema}
```

A feature in the domain of ${target_domain} that satisfies all the features of the spec is:


In [21]:
def run_prompt_5(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
        "text_annotated": [],
        "text_redacted": [],
        "schema": [],
        "target_domain": [],
        "subvehicle_name": [],
        "subtensor_name_as_json_key": [],
        "subvehicle_name_as_json_key": [],
    }

    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_5_output_save_to_prefix}_idx_{idx}.txt")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    subtensor_purpose = filled_strings[idx].values["subtensor_purpose"]
    subtensor_mechanism = filled_strings[idx].values["subtensor_mechanism"]
    text = filled_strings[idx].values["text"]
    text_annotated = filled_strings[idx].values["text_annotated"]
    text_redacted = filled_strings[idx].values["text_redacted"]
    schema = filled_strings[idx].values["schema"]
    target_domain = filled_strings[idx].values["target_domain"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        with open(save_path, 'r') as file:
            out_content = file.read()
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=False, is_short_ans=True)
        out_content = out.choices[0].message.content
        
        # save output
        with open(save_path, 'w+') as file:
            file.write(out_content)
        print(f"Called model and saved to {save_path}")

    pre_df["level_of_difficulty"].append(level_of_difficulty)
    pre_df["tensor_name"].append(tensor_name)
    pre_df["source_domain"].append(source_domain)
    pre_df["subtensor_name"].append(subtensor_name)
    pre_df["subtensor_definition"].append(subtensor_definition)
    pre_df["subtensor_purpose"].append(subtensor_purpose)
    pre_df["subtensor_mechanism"].append(subtensor_mechanism)
    pre_df["text"].append(text)
    pre_df["text_annotated"].append(text_annotated)
    pre_df["text_redacted"].append(text_redacted)
    pre_df["schema"].append(schema)
    pre_df["target_domain"].append(target_domain)
    # new
    pre_df["subvehicle_name"].append(out_content)
    pre_df["subtensor_name_as_json_key"].append(subtensor_name.replace(" ", "_").lower())
    pre_df["subvehicle_name_as_json_key"].append(out_content.replace(" ", "_").lower())

    df_prompt_6_input = pd.DataFrame(pre_df)
    df_prompt_6_input.to_csv(prompt_5_output_save_to)
    
    return df_prompt_6_input

In [22]:
print(prompt_6_template)

Write an extended metaphor for how ${subtensor_name} in ${tensor_name} is similar in mechanism and function to ${subvehicle_name} in ${target_domain}. Both ${subtensor_name} in ${tensor_name} and ${subvehicle_name} in ${target_domain} have the following characteristics in common:

Spec:```
${schema}
```

Desired format of output: json list of objects.
```
{
	"extended_metaphor": [
		{
			"metaphor": both ${subtensor_name} in ${tensor_name} and ${subvehicle_name} in ${target_domain} have {description of first design principle and mechanism in spec}
			"${subtensor_name_as_json_key}": describe the purpose and mechanism utilized by the design principle in ${subtensor_name}
			"${subvehicle_name_as_json_key}": explain how the purpose and mechanism utilized by ${subvehicle_name} in ${target_domain} mirrors that used in the ${subtensor_name} above
		},
		...
	]
}


In [23]:
def run_prompt_6(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
        "text_annotated": [],
        "text_redacted": [],
        "schema": [],
        "target_domain": [],
        "subvehicle_name": [],
        "subtensor_name_as_json_key": [],
        "subvehicle_name_as_json_key": [],
        "extended_metaphor": [], 
    }

    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_6_output_save_to_prefix}_idx_{idx}.json")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    subtensor_purpose = filled_strings[idx].values["subtensor_purpose"]
    subtensor_mechanism = filled_strings[idx].values["subtensor_mechanism"]
    text = filled_strings[idx].values["text"]
    text_annotated = filled_strings[idx].values["text_annotated"]
    text_redacted = filled_strings[idx].values["text_redacted"]
    schema = filled_strings[idx].values["schema"]
    target_domain = filled_strings[idx].values["target_domain"]
    subvehicle_name = filled_strings[idx].values["subvehicle_name"]
    subtensor_name_as_json_key = filled_strings[idx].values["subtensor_name_as_json_key"]
    subvehicle_name_as_json_key = filled_strings[idx].values["subvehicle_name_as_json_key"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        out_content = load_json(save_path)
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=True)
        out_content = json.loads(out.choices[0].message.content)
        
        # save output
        save_json(out_content, save_path)
        print(f"Called model and saved to {save_path}")

    pre_df["level_of_difficulty"].append(level_of_difficulty)
    pre_df["tensor_name"].append(tensor_name)
    pre_df["source_domain"].append(source_domain)
    pre_df["subtensor_name"].append(subtensor_name)
    pre_df["subtensor_definition"].append(subtensor_definition)
    pre_df["subtensor_purpose"].append(subtensor_purpose)
    pre_df["subtensor_mechanism"].append(subtensor_mechanism)
    pre_df["text"].append(text)
    pre_df["text_annotated"].append(text_annotated)
    pre_df["text_redacted"].append(text_redacted)
    pre_df["schema"].append(schema)
    pre_df["target_domain"].append(target_domain)
    pre_df["subvehicle_name"].append(subvehicle_name)
    pre_df["subtensor_name_as_json_key"].append(subtensor_name_as_json_key)
    pre_df["subvehicle_name_as_json_key"].append(subvehicle_name_as_json_key)
    # new
    pre_df["extended_metaphor"].append(json.dumps(out_content["extended_metaphor"], indent=2))

    df_prompt_7_input = pd.DataFrame(pre_df)
    df_prompt_7_input.to_csv(prompt_6_output_save_to)
    
    return df_prompt_7_input

In [24]:
print(prompt_7_template)

Write a concise analogy in the form of a poem given the following information.

Information: ```
${extended_metaphor}
```


In [25]:
def run_prompt_7(filled_strings):
    pre_df = {
        "level_of_difficulty": [],
        "tensor_name": [],
        "source_domain": [],
        "subtensor_name": [],
        "subtensor_definition": [],
        "subtensor_purpose": [],
        "subtensor_mechanism": [],
        "text": [],
        "text_annotated": [],
        "text_redacted": [],
        "schema": [],
        "target_domain": [],
        "subvehicle_name": [],
        "subtensor_name_as_json_key": [],
        "subvehicle_name_as_json_key": [],
        "extended_metaphor": [],
        "final_output": [], 
    }

    # for idx in range(len(filled_strings)):
    idx = 0
    save_path = pathlib.Path(f"{prompt_7_output_save_to_prefix}_idx_{idx}.txt")

    level_of_difficulty = filled_strings[idx].values["level_of_difficulty"]
    tensor_name = filled_strings[idx].values["tensor_name"]
    source_domain = filled_strings[idx].values["source_domain"]
    subtensor_name = filled_strings[idx].values["subtensor_name"]
    subtensor_definition = filled_strings[idx].values["subtensor_definition"]
    subtensor_purpose = filled_strings[idx].values["subtensor_purpose"]
    subtensor_mechanism = filled_strings[idx].values["subtensor_mechanism"]
    text = filled_strings[idx].values["text"]
    text_annotated = filled_strings[idx].values["text_annotated"]
    text_redacted = filled_strings[idx].values["text_redacted"]
    schema = filled_strings[idx].values["schema"]
    target_domain = filled_strings[idx].values["target_domain"]
    subvehicle_name = filled_strings[idx].values["subvehicle_name"]
    subtensor_name_as_json_key = filled_strings[idx].values["subtensor_name_as_json_key"]
    subvehicle_name_as_json_key = filled_strings[idx].values["subvehicle_name_as_json_key"]
    extended_metaphor = filled_strings[idx].values["extended_metaphor"]
    
    if save_path.exists():
        print(f"Loading from {save_path}")
        with open(save_path, 'r') as file:
            out_content = file.read()
    else:
        # call model
        out = Completion(filled_strings[idx].filled, is_json=False)
        out_content = out.choices[0].message.content
        
        # save output
        with open(save_path, 'w+') as file:
            file.write(out_content)
        print(f"Called model and saved to {save_path}")

    pre_df["level_of_difficulty"].append(level_of_difficulty)
    pre_df["tensor_name"].append(tensor_name)
    pre_df["source_domain"].append(source_domain)
    pre_df["subtensor_name"].append(subtensor_name)
    pre_df["subtensor_definition"].append(subtensor_definition)
    pre_df["subtensor_purpose"].append(subtensor_purpose)
    pre_df["subtensor_mechanism"].append(subtensor_mechanism)
    pre_df["text"].append(text)
    pre_df["text_annotated"].append(text_annotated)
    pre_df["text_redacted"].append(text_redacted)
    pre_df["schema"].append(schema)
    pre_df["target_domain"].append(target_domain)
    pre_df["subvehicle_name"].append(subvehicle_name)
    pre_df["subtensor_name_as_json_key"].append(subtensor_name_as_json_key)
    pre_df["subvehicle_name_as_json_key"].append(subvehicle_name_as_json_key)
    pre_df["extended_metaphor"].append(extended_metaphor)
    # new
    pre_df["final_output"].append(out_content)

    df_final = pd.DataFrame(pre_df)
    df_final.to_csv(prompt_7_output_save_to)
    
    return df_final

## Run Prompts

In [26]:
df_prompt_1_input = pd.read_csv(pathlib.Path("../prompt_fills/concepts_per_domain.csv"))
df_prompt_1_input = df_prompt_1_input.rename(columns={"Scientific Domain": "source_domain", "Main Tenor": "tensor_name", "Level of Difficulty": "level_of_difficulty"})
df_prompt_1_input

Unnamed: 0,source_domain,level_of_difficulty,tensor_name
0,Astronomy,Elementary,solar system
1,Astronomy,Intermediate,big bang theory
2,Astronomy,Advanced,theory of the naked singularity
3,Biology,Elementary,ecosystems
4,Biology,Intermediate,DNA
5,Biology,Advanced,endocrine system
6,Chemistry,Elementary,chemical reaction
7,Chemistry,Intermediate,molecular compounds
8,Chemistry,Advanced,chemical kinetics
9,Computer Science,Elementary,computer program


### Prompt 1

In [27]:
filled_strings = get_filled_strings_from_dataframe(prompt_1_template, df_prompt_1_input)
print(filled_strings[0].filled)

List the sub-concepts associated with solar system from Astronomy in json format.

Desired format of output: json list of objects.
```
{
	"sub_concepts": [
		{
			"name": sub-concept’s name (string)
			"definition": sub-concept definition (string)
		},
	...
	]
}
```


In [28]:
df_prompt_2_input = run_prompt_1(filled_strings)

Loading from ../generations/notebook_run/prompt_1_output_idx_0.json


In [29]:
df_prompt_2_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system
1,Elementary,solar system,Astronomy,Planets,Celestial bodies that revolve around the Sun i...
2,Elementary,solar system,Astronomy,Moons,Natural satellites that orbit planets
3,Elementary,solar system,Astronomy,Asteroids,Small rocky bodies that orbit the Sun
4,Elementary,solar system,Astronomy,Comets,Icy bodies that orbit the Sun and have a tail ...
5,Elementary,solar system,Astronomy,Dwarf Planets,Celestial bodies similar to planets but have n...


### Prompt 2

In [30]:
filled_strings = get_filled_strings_from_dataframe(prompt_2_template, df_prompt_2_input)
print(filled_strings[0].filled)

Describe the purpose and mechanism of the Sun relative to solar system using terms specific to geometry, math, engineering, and design.

Name: Sun
Definition: The star at the center of the solar system

Desired format of output: json object.
```
{
	"purpose": what purpose the Sun fulfills and why, without using Astronomy-specific terms.
	"mechanism": how the Sun is structured and how that structure supports the purpose
}
```


In [31]:
df_prompt_3_input = run_prompt_2(filled_strings)

Loading from ../generations/notebook_run/prompt_2_output_idx_0.json


In [32]:
df_prompt_3_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...


### Prompt 3

In [33]:
filled_strings = get_filled_strings_from_dataframe(prompt_3_template, df_prompt_3_input)
print(filled_strings[0].filled)

Annotate the following text. 

Surround in double curly braces any words, phrases, and adjectives related to Astronomy, chemistry, or scientific jargon.

Surround in square braces any words, phrases, and adjectives related to geometry, math, engineering, and design.

Text: """The Sun serves as the gravitational anchor that holds the solar system together, providing the necessary force to keep planets in stable orbits. The Sun's massive size and gravitational pull create a central point of attraction, around which planets move in elliptical orbits according to Kepler's laws of planetary motion. The Sun's design as a massive ball of hot plasma generates energy through nuclear fusion, which maintains its internal pressure and heat, allowing it to emit light and heat essential for sustaining life on Earth."""

Output annotated text:



In [34]:
df_prompt_4_input = run_prompt_3(filled_strings)

Loading from ../generations/notebook_run/prompt_3_output_idx_0.txt


In [35]:
df_prompt_4_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text,text_annotated,text_redacted
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...


### Prompt 4

In [36]:
filled_strings = get_filled_strings_from_dataframe(prompt_4_template, df_prompt_4_input)
print(filled_strings[0].filled)

Given the following redacted text, create a list of engineering design principles used.

text: """XXX XXX serves as the gravitational anchor that holds the XXX XXX together, providing the necessary force to keep planets in stable orbits. XXX XXX's massive size and gravitational pull create a central point XXX attraction, around which planets move in elliptical orbits according to XXX XXX XXX XXX XXX. XXX XXX's design as a massive ball XXX hot plasma generates energy through XXX XXX, which maintains its internal pressure and heat, allowing it to emit light and heat essential for sustaining life on XXX."""

Desired format of output: json list of objects.
```
{
	"engineering_design_principles": [
		{
			"description": name of engineering design principles
			"mechanism": summarize description of purpose for and mechanism utilized by the design principle
		},
		...
	]
}
```


In [37]:
df_prompt_5_input = run_prompt_4(filled_strings)

Loading from ../generations/notebook_run/prompt_4_output_idx_0.json


In [38]:
df_prompt_5_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text,text_annotated,text_redacted,schema,target_domain
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",Kpop
1,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",baseball
2,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",Iron Man
3,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",money


### Prompt 5

In [39]:
filled_strings = get_filled_strings_from_dataframe(prompt_5_template, df_prompt_5_input)
print(filled_strings[0].filled)

Describe a feature in the domain of Kpop that satisfies all the features of the following spec.

Spec:```
[
  {
    "description": "Gravitational Force",
    "mechanism": "Provides the necessary force to keep planets in stable orbits by being an anchor and creating a central point of attraction"
  },
  {
    "description": "Elliptical Orbits",
    "mechanism": "Planets move in elliptical paths around a central point of attraction based on gravitational forces"
  },
  {
    "description": "Thermodynamics",
    "mechanism": "Design of the star as a massive ball of hot plasma generating energy through nuclear fusion to maintain internal pressure and emit light and heat"
  },
  {
    "description": "Sustainability",
    "mechanism": "Enables sustaining life on the planet by emitting light and heat necessary for life"
  }
]
```

A feature in the domain of Kpop that satisfies all the features of the spec is:


In [40]:
df_prompt_6_input = run_prompt_5(filled_strings)

Loading from ../generations/notebook_run/prompt_5_output_idx_0.txt


In [41]:
df_prompt_6_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text,text_annotated,text_redacted,schema,target_domain,subvehicle_name,subtensor_name_as_json_key,subvehicle_name_as_json_key
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",Kpop,"Superstar or ""idol"" status",sun,"superstar_or_""idol""_status"


### Prompt 6

In [42]:
filled_strings = get_filled_strings_from_dataframe(prompt_6_template, df_prompt_6_input)
print(filled_strings[0].filled)

Write an extended metaphor for how Sun in solar system is similar in mechanism and function to Superstar or "idol" status in Kpop. Both Sun in solar system and Superstar or "idol" status in Kpop have the following characteristics in common:

Spec:```
[
  {
    "description": "Gravitational Force",
    "mechanism": "Provides the necessary force to keep planets in stable orbits by being an anchor and creating a central point of attraction"
  },
  {
    "description": "Elliptical Orbits",
    "mechanism": "Planets move in elliptical paths around a central point of attraction based on gravitational forces"
  },
  {
    "description": "Thermodynamics",
    "mechanism": "Design of the star as a massive ball of hot plasma generating energy through nuclear fusion to maintain internal pressure and emit light and heat"
  },
  {
    "description": "Sustainability",
    "mechanism": "Enables sustaining life on the planet by emitting light and heat necessary for life"
  }
]
```

Desired format of o

In [43]:
df_prompt_7_input = run_prompt_6(filled_strings)

Loading from ../generations/notebook_run/prompt_6_output_idx_0.json


In [44]:
df_prompt_7_input

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text,text_annotated,text_redacted,schema,target_domain,subvehicle_name,subtensor_name_as_json_key,subvehicle_name_as_json_key,extended_metaphor
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",Kpop,"Superstar or ""idol"" status",sun,"superstar_or_""idol""_status","[\n {\n ""metaphor"": ""both Sun in solar sys..."


### Prompt 7

In [45]:
filled_strings = get_filled_strings_from_dataframe(prompt_7_template, df_prompt_7_input)
print(filled_strings[0].filled)

Write a concise analogy in the form of a poem given the following information.

Information: ```
[
  {
    "metaphor": "both Sun in solar system and Superstar or 'idol' status in Kpop have Gravitational Force",
    "sun": "The gravitational force of the Sun provides the necessary anchor to keep planets in stable orbits, creating a central point of attraction for celestial bodies.",
    "superstar_or_idol_status": "Similarly, the 'idol' status in Kpop acts as a gravitational force, anchoring fans' attention and creating a central point of attraction for the audience."
  },
  {
    "metaphor": "both Sun in solar system and Superstar or 'idol' status in Kpop have Elliptical Orbits",
    "sun": "Planets move in elliptical paths around the Sun, following gravitational forces that dictate their trajectory based on the central point of attraction.",
    "superstar_or_idol_status": "Likewise, the journey of a Kpop 'idol' can be likened to an elliptical orbit, revolving around fame and success 

In [46]:
final_output = run_prompt_7(filled_strings)

Called model and saved to ../generations/notebook_run/prompt_7_output_idx_0.txt


In [50]:
final_output

Unnamed: 0,level_of_difficulty,tensor_name,source_domain,subtensor_name,subtensor_definition,subtensor_purpose,subtensor_mechanism,text,text_annotated,text_redacted,schema,target_domain,subvehicle_name,subtensor_name_as_json_key,subvehicle_name_as_json_key,extended_metaphor,final_output
0,Elementary,solar system,Astronomy,Sun,The star at the center of the solar system,The Sun serves as the gravitational anchor tha...,The Sun's massive size and gravitational pull ...,The Sun serves as the gravitational anchor tha...,"""""""The Sun serves as the gravitational anchor ...",XXX XXX serves as the gravitational anchor tha...,"[\n {\n ""description"": ""Gravitational Forc...",Kpop,"Superstar or ""idol"" status",sun,"superstar_or_""idol""_status","[\n {\n ""metaphor"": ""both Sun in solar sys...","Just like the Sun in the celestial ballet,\nKp..."


In [47]:
print(final_output[["final_output"]].iloc[0, 0])

Just like the Sun in the celestial ballet,
Kpop idols too have a gravitational sway.
An anchor, a force, pulling fans near,
Creating orbits of fame, so clear.

Elliptical paths both entities tread,
Guided by forces, their journeys are led.
The Sun's pull on planets, a familiar dance,
Idols' fame revolves in a similar trance.

Thermodynamics at play, energy aglow,
The Sun's fusion lights up its show.
Likewise, idol status, talent ablaze,
Radiating energy in captivating ways.

Sustainability, a key for both to last,
The Sun's light, for life's cast.
Idols' influence, a vital beam,
Creating a space where dreams can gleam.


In [49]:
len(final_output)

1