In [5]:
from llm import chat_openai
import json
from datetime import datetime
import os
from autogpt.commands.web_search import safe_google_results, web_search_ddg

In [6]:
import re

def sanitize_filename(filename):
    return re.sub(r'[^a-zA-Z0-9]', '_', filename)

In [7]:
search_query = "How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?"

In [8]:
search_query_file_safe = sanitize_filename(search_query)

In [5]:
# folder_path = f'autoscious_logs/decompositions/{sanitize_filename(search_query)}'
# if not os.path.exists(folder_path):
#     os.makedirs(folder_path)

In [101]:
# folder_path = f'autoscious_logs/inferences/{sanitize_filename(search_query)}'
# if not os.path.exists(folder_path):
#     os.makedirs(folder_path)

## MECE question decomposition

In [10]:
seed_initial_question_decomposition_prompt = f'''
Research project question: {search_query}

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{{
  "project_objective": "",
  "key_drivers": {{
    "1": {{
      "driver": "",
      "hypotheses": {{
          "1": {{
            "hypothesis": "",
            "key_questions": {{
                "1": "", 
                "2": "",
                etc.
            }},
          }},
          etc.
      }},
    "2" : {{}},
    etc.
    }}"",
  }},
}}
```
Respond only with the output, with no explanation or conversation.
'''

In [11]:
print("seed_initial_question_decomposition_prompt", seed_initial_question_decomposition_prompt)

seed_initial_question_decomposition_prompt 
Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{
  "project_objective": "",
  "key_drivers": {
    "1": {
      "driver": "",
      "hypotheses": {
          "1": {
            "hypothesis": "",
            "key_questions": {
                "1": "", 
                "2": "",
                etc.
            },
          },
          etc.
      },
    "2" : {},
    etc.
    }"",
  },
}
```
Respon

In [12]:
res = chat_openai(seed_initial_question_decomposition_prompt, model="gpt-3.5-turbo")
print(res[0])

Prompt:  
Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{
  "project_objective": "",
  "key_drivers": {
    "1": {
      "driver": "",
      "hypotheses": {
          "1": {
            "hypothesis": "",
            "key_questions": {
                "1": "", 
                "2": "",
                etc.
            },
          },
          etc.
      },
    "2" : {},
    etc.
    }"",
  },
}
```
Respond only with the output, with no ex

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n  \"project_objective\": \"To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria\",\n  \"key_drivers\": {\n    \"1\": {\n      \"driver\": \"ECR enzyme activity\",\n      \"hypotheses\": {\n        \"1\": {\n          \"hypothesis\": \"ECR enzyme activity is high in Kitsatospor setae bacteria\",\n          \"key_questions\": {\n            \"1\": \"What is the level of ECR enzyme activity in Kitsatospor setae bacteria?\",\n            \"2\": \"How does ECR enzyme activity vary under different conditions?\",\n            \"3\": \"Are there any factors that enhance or inhibit ECR enzyme activity?\"\n          }\n        }\n      }\n    }\n  }\n}",
        "role": "assistant"
      }
    }
  ],
  "created": 1690744745,
  "id": "chatcmpl-7i6ETD80Zlsi2RIIvZyM7UuZzfl1q",
  "model": "gpt-3.5-turbo-0613",
  "object": "chat.completion",
  "usage"

In [13]:
question_decomposition = json.loads(res[0])

In [28]:
question_decomposition['search_query'] = search_query

In [29]:
print(question_decomposition)

{'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'Are there any factors that enhance or inhibit ECR enzyme activity?'}}}}}, 'search_query': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?'}


In [15]:
key_questions = question_decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']
print(key_questions)

{'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'Are there any factors that enhance or inhibit ECR enzyme activity?'}


In [47]:
with open(f'autoscious_logs/decompositions/{search_query_file_safe}.json', 'w') as f:
    json.dump(question_decomposition, f, indent=2)

FileNotFoundError: [Errno 2] No such file or directory: 'autoscious_logs/decompositions/How_efficiently_do_the_ECR_enzymes_work__especially_in_Kitsatospor_setae_bacteria_.json'

## Search

In [5]:
with open(f'autoscious_logs/{sanitize_filename(search_query)}/decompositions/improved_decomposition.json', 'r') as f:
        question_decomposition = json.load(f)
key_questions = question_decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']
print(key_questions)

{'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}


In [6]:
key_questions['1']

'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?'

In [7]:
web_search_res = web_search_ddg(key_questions['1'])

In [8]:
web_search_res

'[\n    {\n        "title": "In the active centre of carbon dioxide conversion",\n        "href": "https://www.mpg.de/13672605/0705-terr-138345-nature-s-fantastic-four",\n        "body": "They analyzed the ECR from the bacterium Kitasatospora setae, currently the fastest known carboxylase. Using a combined approach of structural biology, biochemistry and computer simulations, they were able to understand for the first time how the enzyme binds and converts CO 2. Teamwork in the active centre"\n    },\n    {\n        "title": "Awakening the Sleeping Carboxylase Function of Enzymes: Engineering the ...",\n        "href": "https://pubs.acs.org/doi/10.1021/jacs.9b03431",\n        "body": "However, for enoyl-CoA carboxylase/reductase from Kitasatospora setae (ECR Ks), four conserved amino acids that form a CO 2-binding pocket at the active site were described recently (Figure 1a). These four amino acids anchor and position the CO 2 molecule during catalysis, in which a reactive enolate is f

In [11]:
# save to main
import os

if not os.path.exists(f'autoscious_logs/{sanitize_filename(search_query)}/facts'):
    os.makedirs(f'autoscious_logs/{sanitize_filename(search_query)}/facts')

if not os.path.exists(f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0'):
    os.makedirs(f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0')
with open(f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0/web_search_res.json', 'w') as f:
    json.dump(json.loads(web_search_res), f, indent=2)

In [14]:
# Load in decomposition
with open(f'autoscious_logs/{sanitize_filename(search_query)}/decompositions/improved_decomposition.json', 'r') as f:
        question_decomposition = json.load(f)
key_questions = question_decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']
print(key_questions)

{'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}


In [19]:
# MAIN SEARCH CODE: Go through each key question and make inferences on how its findings on the hypothesis.
import os

if not os.path.exists(f'autoscious_logs/{sanitize_filename(search_query)}/facts'):
    os.makedirs(f'autoscious_logs/{sanitize_filename(search_query)}/facts')

for kq_idx in key_questions:
    web_search_res = web_search_ddg(key_questions[kq_idx])

    # Save files
    folder_path = f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq{int(kq_idx)-1}'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    with open(folder_path + '/web_search_res.json', 'w') as f:
        json.dump(json.loads(web_search_res), f, indent=2)

In [46]:
from autogpt.commands.web_selenium import browse_website

NoConsoleScreenBufferError: Found xterm-color, while expecting a Windows console. Maybe try to run this program using "winpty" or run it in cmd.exe instead. Or otherwise, in case of Cygwin, use the Python executable that is compiled for Cygwin.

## Extract relevant facts and evidence for key question

In [21]:
with open(f'autoscious_logs/{sanitize_filename(search_query)}/decompositions/improved_decomposition.json', 'r') as f:
        question_decomposition = json.load(f)


key_questions = question_decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']
print(key_questions)

{'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}


In [22]:
numbered_key_questions_string = ""
for key, value in key_questions.items():
    first_two_words = ' '.join(value.split())
    numbered_key_questions_string += f'{int(key) - 1}. {first_two_words}\n'

print(numbered_key_questions_string)

0. What is the level of ECR enzyme activity in Kitsatospor setae bacteria?
1. How does ECR enzyme activity compare to other bacteria?
2. What factors influence ECR enzyme activity in Kitsatospor setae bacteria?
3. What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?



In [None]:
# Get list of search results
with open(f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0/web_search_res.json', 'r') as f:
    search_results = json.loads(f.read())

print(search_results)

#### Extract facts from website

In [5]:
# Get the list of key questions
def get_key_questions_list_string(search_query_file_safe):
    file_path = f'autoscious_logs/{search_query_file_safe}/decompositions/improved_decomposition.json'

    with open(file_path, 'r') as f:
            question_decomposition = json.load(f)
    key_questions = question_decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']
    print("\nkey_questions\n", key_questions)

    numbered_key_questions_string = ""
    for key, value in key_questions.items():
        first_two_words = ' '.join(value.split())
        numbered_key_questions_string += f'{int(key) - 1}. {first_two_words}\n'

    print("\nnumbered_key_questions_string\n", numbered_key_questions_string)
    return key_questions, numbered_key_questions_string

In [6]:
import os

def extract_facts_from_website_text(search_query_file_safe, key_questions_list_string, website_title, website_text, website_url):
    seed_initial_question_decomposition_prompt = f'''
Key questions (index : question): 
{key_questions_list_string}

Task: 
Extract and output as many accurate direct quotes from the text that are relevant to answering the key questions and its most relevant key question index. Format as a JSON.
```json
{{
  "1": {{
    "quote": "",
    "index": 0
  }},
  etc.
}}
```

Text: {website_text}

Respond only with the output, with no explanation or conversation.
'''
    # Ask GPT the prompt
    print("seed_initial_question_decomposition_prompt", seed_initial_question_decomposition_prompt)
    res = chat_openai(seed_initial_question_decomposition_prompt, model="gpt-3.5-turbo")
    print("Extracted quotes: ", res[0])

    # Save the quote to the corresponding key question index file
    res_json = json.loads(res[0])
    for key, value in res_json.items():
        index = value['index']
        quote = value['quote']

        file_name = f'autoscious_logs/{search_query_file_safe}/facts/kq{index}/{website_title}_facts.txt'

        with open(file_name, 'a', encoding='utf-8') as f:
            f.write(quote.replace('/"', '"') + f"[{website_url}]" + os.linesep)

    return

In [7]:
def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
    chunks = []
    for i in range(0, len(text), chunk_size - overlap):
        chunks.append(text[i : i + chunk_size])
        print("len(chunk): ", text[i : i + chunk_size])
    return chunks

In [8]:
import os
import signal
import subprocess

def kill_chromedriver():
    # Find and kill chromedriver processes
    if os.name == 'nt':  # For Windows
        try:
            subprocess.check_call("taskkill /im chromedriver.exe /f", shell=True)
        except subprocess.CalledProcessError:
            print('No existing chromedriver processes were found.')
    else:  # For Unix-like OS
        try:
            subprocess.check_call(['pkill', 'chromedriver'])
        except subprocess.CalledProcessError:
            print('No existing chromedriver processes were found.')

In [9]:
from autogpt.commands.web_selenium import scrape_text_with_selenium_no_agent
import json
import os
import re

chunk_size = 4000
overlap = 25
MAX_TOKENS = 10000 # Roughly 3K tokens, $0.10 per MAX_TOKENs
curr_tokens = 0

def sanitize_filename(filename):
    return re.sub(r'[^a-zA-Z0-9]', '_', filename)

# 1) Get key questions
search_query_file_safe = sanitize_filename(search_query)
key_questions_dict, key_questions_list_string = get_key_questions_list_string(search_query_file_safe)

# 2) Open the web search results for key question 1
# Get list of search results
with open(f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0/web_search_res.json', 'r') as f:
    search_results = json.loads(f.read())


kill_chromedriver()
driver = None
for web_source in search_results:
    print("Current tokens: ", curr_tokens)
    if curr_tokens > MAX_TOKENS:
        print("Max tokens reached!")
        break

    url = web_source['href']
    print("Search result: ", web_source['title'], url)
    file_title = sanitize_filename(web_source["title"])
    file_path = f'autoscious_logs/{sanitize_filename(search_query)}/facts/kq0/{file_title}_complete_text.txt'

    # If we haven't already scraped the text
    if not os.path.exists(file_path):
        print("Scraping text...")

        # Scrape url
        driver, text = scrape_text_with_selenium_no_agent(url, driver)

        # Extract relevant facts to key questions from web search results
        # TODO: Chunk text if text is too long
        chunks = chunk_text(text, chunk_size, overlap)
        for i, chunk in enumerate(chunks):
            print(f"Chunk: {i} / {len(chunks)}")
            extract_facts_from_website_text(search_query_file_safe, key_questions_list_string, file_title, chunk, url)

            curr_tokens += len(chunks)
            if curr_tokens > MAX_TOKENS:
                print("Max tokens reached!")
                break
            else:
                break # for debugging

        # Save complete text to mark already analyzed
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(text)
    else:
        print("Already scraped!")

if driver:
    driver.quit()
print("SEARCH COMPLETE!")


key_questions
 {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}

numbered_key_questions_string
 0. What is the level of ECR enzyme activity in Kitsatospor setae bacteria?
1. How does ECR enzyme activity compare to other bacteria?
2. What factors influence ECR enzyme activity in Kitsatospor setae bacteria?
3. What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?

No existing chromedriver processes were found.
Current tokens:  0
Search result:  In the active centre of carbon dioxide conversion https://www.mpg.de/13672605/0705-terr-138345-nature-s-fantastic-four
Already scraped!
Current tokens:  0
Search result:  Awakening the Sleeping Carb

## Logical inferences

In [1]:
# Open the file in read mode ('r')

key_question_index = 0

with open(f'autoscious_logs/kd_1/h_1/f_{key_question_index}.txt', 'r', encoding='utf-8') as file:
    # Read the entire contents of the file
    facts_text = file.read()

In [4]:
facts_text[:500]

'Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nKitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nThe K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nUsing molecular d'

In [5]:
# Remove URLs from text for easier analysis
import re

cleaned_facts_text = re.sub(r'\[.*?\]', '', facts_text)

print(cleaned_facts_text[:500])


Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases

Kitsatospor setae bacteria

ECR enzyme activity

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.

Using molecular dynamics simulations and structure-based mutagenesis, we show that catalysis is synchronized in the K. setae ECR across the pair of dimers.

In a recent study, we reported the ternary structure of the ECR (EC 1.3.


In [6]:
len(cleaned_facts_text)

20030

In [13]:
key_questions_dict, key_questions_list_string = get_key_questions_list_string(search_query_file_safe)


key_questions
 {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'Are there any factors that enhance or inhibit ECR enzyme activity?'}

numbered_key_questions_string
 0. What is the level of ECR enzyme activity in Kitsatospor setae bacteria?
1. How does ECR enzyme activity vary under different conditions?
2. Are there any factors that enhance or inhibit ECR enzyme activity?



In [14]:
key_question_string = key_questions_dict[str(key_question_index + 1)]

In [15]:
hypothesis_string = "ECR enzyme activity is high in Kitsatospor setae bacteria"

In [126]:
MAX_TOKENS = 10000

def get_report_findings_prompt(hypothesis_string, key_question_string, facts_text):
    return f'''
Initial hypothesis: {hypothesis_string}

Key question: {key_question_string}

Facts: {facts_text[:MAX_TOKENS]}

Task: 
Based on the facts, report your findings on the current answer to the key question and how this verifies, falsifies, or changes the initial hypothesis. Be sure to use accurate direct quotes and their urls from the the facts used in each reasoning step. Think step by step.

The output should be of the format: 
Our current hypothesis is ...
This is based on the following reasons ...
'''

In [17]:
res = chat_openai(get_report_findings_prompt(hypothesis_string, key_question_string, facts_text), model="gpt-3.5-turbo")
print(res[0])

Prompt:  
Initial hypothesis: ECR enzyme activity is high in Kitsatospor setae bacteria

Key question: What is the level of ECR enzyme activity in Kitsatospor setae bacteria?

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Using molecular dynamics simulations and structure-based mutagenesis, we show that catalysis is synchronized in the K. setae ECR across the pair of dimers.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

In a recent study, we reported the ternary structure of the ECR (EC 1.3.1.85) from K. setae in a complex with ethylmalonyl-C

In [20]:
hypothesis = res[0]
print(hypothesis)

Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria. This hypothesis is based on the following reasons:

1. The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. The presence of the intact butyryl-CoA thioester in the active site of the closed-form subunits strongly suggests that the A and B conformations represent the Michaelis complex in which the substrate and cofactor are positioned for catalysis. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

3. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Based on these reasons, it ca

In [21]:
rule = "Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria."
facts = '''
1. The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. The presence of the intact butyryl-CoA thioester in the active site of the closed-form subunits strongly suggests that the A and B conformations represent the Michaelis complex in which the substrate and cofactor are positioned for catalysis. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

3. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]
'''

### Strength of Induction

In [75]:

def retrieve_deductive_consistency_evaluator_prompt(rule, facts):
    prompt = f'''
Premises:
{facts}

Conclusion: 
{rule}

Based on the premises and assuming all the premises are true, is the conclusion also true, partially true, or false? Does the conclusion contradict any of the premises or does it logically follow from them? And what percentage degree of support do the premises provide for the truth of the conclusion?

Your output should be of the following example format:
```json
{{
    "label": "",
    "score": XX,
    "reasoning": "..."
}}
```
'''
    return prompt

In [111]:
res = chat_openai(retrieve_deductive_consistency_evaluator_prompt(rule, facts))
print(res[0])
strength_of_induction_res = json.loads(res[0])

Prompt:  
Premises:

1. The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. The presence of the intact butyryl-CoA thioester in the active site of the closed-form subunits strongly suggests that the A and B conformations represent the Michaelis complex in which the substrate and cofactor are positioned for catalysis. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

3. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]


Conclusion: 
Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria.

Based on the premises and assuming all the pr

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n    \"label\": \"Partially true\",\n    \"score\": 80,\n    \"reasoning\": \"The conclusion is partially true. The premises provide strong evidence that the ECR enzyme activity is high in Kitsatospor setae bacteria. Premises 1 and 2 establish the presence of the catalytically active state and the positioning of the substrate and cofactor for catalysis. Premise 3 further supports the efficiency of the CO2-fixing enzyme in K. setae ECR. However, it is important to note that the conclusion is a hypothesis and may require further experimental validation to confirm the actual enzyme activity in Kitsatospor setae bacteria.\"\n}",
        "role": "assistant"
      }
    }
  ],
  "created": 1690857689,
  "id": "chatcmpl-7iZc9wof7HyQvmSVddRKXUYSFpgcV",
  "model": "gpt-3.5-turbo-0613",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 137,
    "pr

### Coherence with Background Knowledge

In [76]:

def retrieve_indiscriminate_confirmation_evaluator_prompt(rule, facts):
    prompt = f'''
Conclusion: 
{rule}

True, partially true, or false, and to what percentage degree: does the conclusion align with what is generally accepted or known about the subject?

Your output should be of the following example format:
```json
{{
    "label": "",
    "score": XX,
    "reasoning": "..."
}}
```
'''
    return prompt

In [113]:
res = chat_openai(retrieve_indiscriminate_confirmation_evaluator_prompt(rule, facts))
print(res[0])
coherence_with_background_knowledge_res = json.loads(res[0])

Prompt:  
Conclusion: 
Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria.

True, partially true, or false, and to what percentage degree: does the conclusion align with what is generally accepted or known about the subject?

Your output should be of the following example format:
```json
{
    "label": "",
    "score": XX,
    "reasoning": "..."
}
```

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n    \"label\": \"Partially true\",\n    \"score\": 70,\n    \"reasoning\": \"While it is known that ECR enzyme activity is present in Kitsatospor setae bacteria, the conclusion does not specify the level of enzyme activity. Therefore, it is only partially true.\"\n}",
        "role": "assistant"
      }
    }
  ],
  "created": 1690857892,
  "id": "chatcmpl-7iZfQBMKZzA8o5i65h9bgvVUmLU4r",
  "model": "gpt-3.5-turbo-0613",
  "object": "chat.completion",
  "usage": {
    "co

### Biases or Fallacies

In [77]:

def retrieve_biases_fallacies_evaluator_prompt(rule, facts):
    prompt = f'''
Premises:
{facts}

Conclusion: 
{rule}

True or Fale: Are there any biases or logical fallacies present that might undermine the argument? Common ones to look for include hasty generalization, or appeal to ignorance.

Your output should be of the following example format:
```json
{{
    "label": "",
    "reasoning": "..."
}}
```
'''
    return prompt

In [117]:
res = chat_openai(retrieve_biases_fallacies_evaluator_prompt(rule, facts))
print(res[0])
biases_or_fallacies_res = json.loads(res[0])

Prompt:  
Premises:

1. The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. The presence of the intact butyryl-CoA thioester in the active site of the closed-form subunits strongly suggests that the A and B conformations represent the Michaelis complex in which the substrate and cofactor are positioned for catalysis. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

3. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]


Conclusion: 
Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria.

True or Fale: Are there any biases or logical

## Verifier results / offering feedback

In [123]:
def use_verifiers(rule, facts): 
    strength_of_induction_res = json.loads(chat_openai(retrieve_deductive_consistency_evaluator_prompt(rule, facts))[0])
    coherence_with_background_knowledge_res = json.loads(chat_openai(retrieve_indiscriminate_confirmation_evaluator_prompt(rule, facts))[0])
    biases_or_fallacies_res = json.loads(chat_openai(retrieve_biases_fallacies_evaluator_prompt(rule, facts))[0])
    return strength_of_induction_res, coherence_with_background_knowledge_res, biases_or_fallacies_res
    

In [124]:
strength_of_induction_res, coherence_with_background_knowledge_res, biases_or_fallacies_res = use_verifiers(rule, facts)

Prompt:  
Premises:

1. The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. The presence of the intact butyryl-CoA thioester in the active site of the closed-form subunits strongly suggests that the A and B conformations represent the Michaelis complex in which the substrate and cofactor are positioned for catalysis. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

3. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]


Conclusion: 
Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria.

Based on the premises and assuming all the pr

In [125]:
print("strength_of_induction_res\n", strength_of_induction_res, "\n\ncoherence_with_background_knowledge_res\n", coherence_with_background_knowledge_res, "\n\nbiases_or_fallacies_res\n", biases_or_fallacies_res)

strength_of_induction_res
 {'label': 'Partially true', 'score': 80, 'reasoning': 'The conclusion is partially true. The premises provide strong evidence that the ECR enzyme activity is high in Kitsatospor setae bacteria. Premises 1 and 2 establish the presence of the catalytically active state and the positioning of the substrate and cofactor for catalysis. Premise 3 further supports the efficiency of the CO2-fixing enzyme in K. setae ECR. However, it is important to note that the conclusion is a hypothesis and may require further experimental validation to confirm the actual enzyme activity in Kitsatospor setae bacteria.'} 

coherence_with_background_knowledge_res
 {'label': 'Partially true', 'score': 70, 'reasoning': 'While it is known that ECR enzyme activity is present in Kitsatospor setae bacteria, the conclusion does not specify the level of enzyme activity. Therefore, it is only partially true.'} 

biases_or_fallacies_res
 {'label': 'False', 'reasoning': 'There are no biases or 

In [140]:
# Ask the model to improve it's hypothesis
MAX_TOKENS = 10000

report_findings_prompt = f'''

Current hypothesis: {hypothesis_string}

Key question: {key_question_string}

Facts: {facts_text[:MAX_TOKENS]}

Task: 
Based on the facts, report your findings on the current answer to the key question and how this verifies, falsifies, or changes the hypothesis. Be sure to use accurate direct quotes and their urls from the the facts used in each reasoning step. Think step by step. If the hypothesis is the same, return "same" as the hypothesis.

Feedback on current hypothesis reasoning:
1. The strength of induction was determined to be {strength_of_induction_res['label']} with a {strength_of_induction_res['score']}% degree of support from premises for truth of the hypothesis. Here was the reasoning: {strength_of_induction_res['reasoning']}
2. The coherence with background knowledge was determined to be {coherence_with_background_knowledge_res['label']} with a {coherence_with_background_knowledge_res['score']}% degree that the hypothesis aligns with what is generally accepted or known about the subject. Here was the reasoning: 
{coherence_with_background_knowledge_res['reasoning']}
3. The existence of biases or logial fallcies present was determined to be {biases_or_fallacies_res['label']}. Here was the reasoning: {biases_or_fallacies_res['reasoning']}

The output new hypothesis and reasons based on facts should be in the following format: 
```json
{{
    "hypothesis: "",
    "reasons", ""
}}
```
'''
print(report_findings_prompt)



Current hypothesis: ECR enzyme activity is high in Kitsatospor setae bacteria

Key question: What is the level of ECR enzyme activity in Kitsatospor setae bacteria?

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Using molecular dynamics simulations and structure-based mutagenesis, we show that catalysis is synchronized in the K. setae ECR across the pair of dimers.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

In a recent study, we reported the ternary structure of the ECR (EC 1.3.1.85) from K. setae in a complex with ethylmalonyl-CoA and N

In [141]:
res = chat_openai(report_findings_prompt, model="gpt-3.5-turbo")
print(res[0])

Prompt:  

Current hypothesis: ECR enzyme activity is high in Kitsatospor setae bacteria

Key question: What is the level of ECR enzyme activity in Kitsatospor setae bacteria?

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Using molecular dynamics simulations and structure-based mutagenesis, we show that catalysis is synchronized in the K. setae ECR across the pair of dimers.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

In a recent study, we reported the ternary structure of the ECR (EC 1.3.1.85) from K. setae in a complex with ethylmalonyl-

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n    \"hypothesis\": \"The level of ECR enzyme activity in Kitsatospor setae bacteria is high.\",\n    \"reasons\": [\n        \"The presence of the catalytically active state and the specific amino acids required for efficient CO2-fixing enzyme support the hypothesis. (source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057)\",\n        \"Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it was shown that four amino acids (N81, F170, E171, and H365) are required to create a highly efficient CO2-fixing enzyme. (source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057)\",\n        \"The study reveals unprecedented molecular details of selective CO2 binding and C-C-bond formation during the catalytic cycle of nature's most efficient CO2-fixing enzyme. (source: https://pubs.acs.org/doi/10.1021/acscentsci.2c

In [None]:
# TODO: Some GPT to check if the hypotheses are the same or not.

In [139]:
rule = "Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria."

facts = '''1. The presence of the catalytically active state: The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. Efficient CO2-fixing enzyme: Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that specific amino acids are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]'''

In [135]:
strength_of_induction_res, coherence_with_background_knowledge_res, biases_or_fallacies_res = use_verifiers(rule, facts)

Prompt:  
Premises:
1. The presence of the catalytically active state: The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

2. Efficient CO2-fixing enzyme: Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that specific amino acids are required to create a highly efficient CO2-fixing enzyme in K. setae ECR. [source: https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Conclusion: 
Our current hypothesis is that ECR enzyme activity is high in Kitsatospor setae bacteria.

Based on the premises and assuming all the premises are true, is the conclusion also true, partially true, or false? Does the conclusion contradict any of the premises or does it logically follow from them? And what percentage degree of support do the premises provide for the truth of the conclusion?



In [136]:
print("strength_of_induction_res\n", strength_of_induction_res, "\n\ncoherence_with_background_knowledge_res\n", coherence_with_background_knowledge_res, "\n\nbiases_or_fallacies_res\n", biases_or_fallacies_res)

strength_of_induction_res
 {'label': 'Partially true', 'score': 80, 'reasoning': 'The conclusion is partially true. The premises provide strong evidence that the ECR enzyme activity is high in Kitsatospor setae bacteria. The presence of the catalytically active state and the specific amino acids required for efficient CO2-fixing enzyme support the hypothesis. However, it is important to note that the conclusion is based on a hypothesis and may require further experimental validation to confirm its truth.'} 

coherence_with_background_knowledge_res
 {'label': 'Partially true', 'score': 70, 'reasoning': 'While it is known that ECR enzyme activity is present in Kitsatospor setae bacteria, the conclusion does not specify the level of enzyme activity. Therefore, it is only partially true.'} 

biases_or_fallacies_res
 {'label': 'False', 'reasoning': 'There are no biases or logical fallacies present in the argument. The premises provide scientific evidence from experimental biochemistry, prot

## Decomposition with Verifiers

In [27]:
def get_initial_decomposition_prompt(search_query):
    return f'''
Research project question: {search_query}

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{{
  "project_objective": "",
  "key_drivers": {{
    "1": {{
      "driver": "",
      "hypotheses": {{
          "1": {{
            "hypothesis": "",
            "key_questions": {{
                "1": "", 
                "2": "",
                etc.
            }},
          }},
          etc.
      }},
    "2" : {{}},
    etc.
    }}"",
  }},
}}
```
Respond only with the output, with no explanation or conversation.
'''

In [28]:
res = chat_openai(get_initial_decomposition_prompt(search_query), model="gpt-3.5-turbo")
print(res[0])

Prompt:  
Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{
  "project_objective": "",
  "key_drivers": {
    "1": {
      "driver": "",
      "hypotheses": {
          "1": {
            "hypothesis": "",
            "key_questions": {
                "1": "", 
                "2": "",
                etc.
            },
          },
          etc.
      },
    "2" : {},
    etc.
    }"",
  },
}
```
Respond only with the output, with no ex

In [29]:
response_1 = res[0]

In [12]:
# verifiers_decomposition_prompt = f'''
# {response_1}

# Task:
# You are well trained in logical reasoning. Based on the premises, use the following strategies and tools to validate the robustness of the conclusion:
# 1. Counterfactual Analysis: This should involve considering alternative scenarios where key premises might not hold or might be different. You should discuss how these counterfactuals could affect the argument and the overall conclusion.
# 2. Sensitivity Analysis: This should involve examining how much the argument depends on the exact values or weights assigned to different premises or criteria. You should discuss how changes to these values could affect the conclusion, and whether the argument might be overly sensitive to any particular premises or criteria.
# 3. Logical Consistency Checks: This should involve checking for potential contradictions or gaps in the argument. You should confirm that the premises are consistent with each other, and that they collectively provide a comprehensive and coherent argument for the conclusion.
# 4. Ethical Analysis: This should involve evaluating the argument from an ethical perspective. You should consider whether the argument aligns with relevant moral principles or values, and whether it might have any potential ethical implications or consequences.

# Your response should be in this format:
# 1. Counterfactual analysis feedback: 
# 2. Sensitivity analysis feedback: 
# 3. Logical consistency checks feedback: 
# 4. Ethical analysis feedback: 

# '''

In [36]:
def get_decomposition_verifiers_prompt(response_1):
    return f'''
{response_1}

Task:
You are well trained in logical reasoning and verification and understand JSON format. Based on the sub-questions and sub-tasks, use the following logical verification checks to ensure that the decomposition is accurate, relevant, and comprehensive.

1. Comprehensiveness: Do the sub-questions or sub-tasks cover all aspects of the main question or task? This check ensures you haven't left out any key component that needs to be addressed.
2. Relevance: Are all sub-questions or sub-tasks directly related to the main question or task? This helps to ensure that you haven't included unnecessary elements that may distract from the main issue.
3. Consistency: Are the sub-questions or sub-tasks logically consistent with each other and the main question or task? They should be interconnected and not contradict each other.
4. Depth: Have you dissected the question or task enough? Some complex issues might need several layers of breakdown. Ensure that the depth of your analysis corresponds to the complexity of the main question or task.
5. Clarity: Are the sub-questions or sub-tasks clear and straightforward? They should not introduce further complexity and should be easily understandable.
6. Synthesis Capability: Can the answers to the sub-questions or results of the sub-tasks be combined to solve the main problem or answer the main question? The breakdown should facilitate a clear path to the solution.
7. Decomposition feedback: Provide feedback on the decomposition content itself.

Your response should be in this format:
1. Comprehensiveness: 
2. Relevance: 
3. Consistency: 
4. Depth: 
5. Clarity: 
6. Synthesis Capability:
7. Decomposition feedback: 

Be as critical, thorough, and specific in your responses as possible.
'''

In [37]:
print(get_decomposition_verifiers_prompt(response_1))


{
  "project_objective": "To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria",
  "key_drivers": {
    "1": {
      "driver": "ECR enzyme activity",
      "hypotheses": {
        "1": {
          "hypothesis": "ECR enzyme activity is high in Kitsatospor setae bacteria",
          "key_questions": {
            "1": "What is the level of ECR enzyme activity in Kitsatospor setae bacteria?",
            "2": "How does ECR enzyme activity vary under different conditions?",
            "3": "Are there any factors that enhance or inhibit ECR enzyme activity?"
          }
        }
      }
    }
  }
}

Task:
You are well trained in logical reasoning and verification and understand JSON format. Based on the sub-questions and sub-tasks, use the following logical verification checks to ensure that the decomposition is accurate, relevant, and comprehensive.

1. Comprehensiveness: Do the sub-questions or sub-tasks cover all aspects of the main question or task? This check ens

In [38]:
res = chat_openai(get_decomposition_verifiers_prompt(response_1), model="gpt-3.5-turbo")
print(res[0])
response_2 = res[0]

Prompt:  
{
  "project_objective": "To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria",
  "key_drivers": {
    "1": {
      "driver": "ECR enzyme activity",
      "hypotheses": {
        "1": {
          "hypothesis": "ECR enzyme activity is high in Kitsatospor setae bacteria",
          "key_questions": {
            "1": "What is the level of ECR enzyme activity in Kitsatospor setae bacteria?",
            "2": "How does ECR enzyme activity vary under different conditions?",
            "3": "Are there any factors that enhance or inhibit ECR enzyme activity?"
          }
        }
      }
    }
  }
}

Task:
You are well trained in logical reasoning and verification and understand JSON format. Based on the sub-questions and sub-tasks, use the following logical verification checks to ensure that the decomposition is accurate, relevant, and comprehensive.

1. Comprehensiveness: Do the sub-questions or sub-tasks cover all aspects of the main question or task? This 

In [45]:
def get_decomposition_improved_prompt(search_query, response_1, response_2):
    return f'''
Research project question: {search_query}

Previous Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Decomposition
{response_1}

Feedback:
{response_2}

Current Task:
You are well trained in logical reasoning and verification and understand JSON format. Based on the decomposition and feedback provided, improve the robustness of the decomposition.

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{{
  "project_question:", "",
  "project_objective": "",
  "key_drivers": {{
    "1": {{
      "driver": "",
      "hypotheses": {{
          "1": {{
            "hypothesis": "",
            "key_questions": {{
                "1": "", 
                "2": "",
                etc.
            }},
          }},
          etc.
      }},
    "2" : {{}},
    etc.
    }}"",
  }},
}}
```
Respond only with the output, with no explanation or conversation.
'''

In [46]:
print(get_decomposition_improved_prompt(search_query, response_1, response_2))


Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Previous Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Decomposition
{
  "project_objective": "To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria",
  "key_drivers": {
    "1": {
      "driver": "ECR enzyme activity",
      "hypotheses": {
        "1": {
          "hypothesis": "ECR enzyme activity is high in Kitsatospor setae bacteria",
          "key_questions": {
            "1": "What is the level of ECR enzyme activity in Kitsatospor setae bacteria?",
            "2": "How does ECR enzyme activity vary under different conditions?",
       

In [47]:
res = chat_openai(get_decomposition_improved_prompt(search_query, response_1, response_2), model="gpt-3.5-turbo")
print(res[0])
response_3 = res[0]

Prompt:  
Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Previous Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Decomposition
{
  "project_objective": "To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria",
  "key_drivers": {
    "1": {
      "driver": "ECR enzyme activity",
      "hypotheses": {
        "1": {
          "hypothesis": "ECR enzyme activity is high in Kitsatospor setae bacteria",
          "key_questions": {
            "1": "What is the level of ECR enzyme activity in Kitsatospor setae bacteria?",
            "2": "How does ECR enzyme activity vary under different conditions?"

In [116]:
def decompose_with_verifiers(search_query):
    model = "gpt-3.5-turbo"

    # Get initial decomposition
    initial_decomposition_res = chat_openai(get_initial_decomposition_prompt(search_query), model=model)[0]

    # Get verifier feedback on initial decomposition
    decomposition_verifiers_res = chat_openai(get_decomposition_verifiers_prompt(initial_decomposition_res), model=model)[0]

    # Improve original decomposition with verifier feedback
    improved_decomposition_res = chat_openai(get_decomposition_improved_prompt(search_query, initial_decomposition_res, decomposition_verifiers_res), model=model)[0]

    # Get verifier feedback on final decomposition
    improved_decomposition_verifiers_res = chat_openai(get_decomposition_verifiers_prompt(improved_decomposition_res), model=model)[0]

    # Save decomposition and verifier feedback to file
    with open(f'autoscious_logs/decompositions/{search_query_file_safe}/initial_decomposition.json', 'w') as f:
        json.dump(json.loads(initial_decomposition_res), f, indent=2)
    with open(f'autoscious_logs/decompositions/{search_query_file_safe}/initial_decomposition_verifier_feedback.txt', 'w', encoding='utf-8') as f:
        f.write(decomposition_verifiers_res)
    
    with open(f'autoscious_logs/decompositions/{search_query_file_safe}/improved_decomposition.json', 'w') as f:
        json.dump(json.loads(improved_decomposition_res), f, indent=2)
        
    with open(f'autoscious_logs/decompositions/{search_query_file_safe}/improved_decomposition_verifier_feedback.txt', 'w', encoding='utf-8') as f:
        f.write(improved_decomposition_verifiers_res)

    # Return final decomposition, along with verifier feedback
    return json.loads(improved_decomposition_res), improved_decomposition_verifiers_res

In [117]:
decomposition, decomposition_verifier_feedback = decompose_with_verifiers(search_query)

Prompt:  
Research project question: How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?

Task: Decompose this research question into 1) project objective: what should be the outcome of the project, 2) key drivers: what are the key problem drivers to achieve the project objective, 3) hypotheses: what can already be hypothesized about the key drivers, 4) key questions: what analyses need to be conducted to verify, falsify, or change the hypotheses?

Rules: Be as mutually exclusive, completely exhaustive (MECE) as possible. The output should be in nested JSON format of 
```json
{
  "project_objective": "",
  "key_drivers": {
    "1": {
      "driver": "",
      "hypotheses": {
          "1": {
            "hypothesis": "",
            "key_questions": {
                "1": "", 
                "2": "",
                etc.
            },
          },
          etc.
      },
    "2" : {},
    etc.
    }"",
  },
}
```
Respond only with the output, with no ex

In [114]:
print(decomposition)

{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity in Kitsatospor setae bacteria compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}


## Logical inferences with Verifiers

In [57]:
# Assume we have facts and we want to answer the question

import re
# Open the file in read mode ('r')

key_question_index = 0

with open(f'autoscious_logs/kd_1/h_1/f_{key_question_index}.txt', 'r', encoding='utf-8') as file:
    # Read the entire contents of the file
    facts_text = file.read()

# Remove URLs from text for easier analysis
cleaned_facts_text = re.sub(r'\[.*?\]', '', facts_text)

print(cleaned_facts_text[:500])


Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases

Kitsatospor setae bacteria

ECR enzyme activity

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.

Using molecular dynamics simulations and structure-based mutagenesis, we show that catalysis is synchronized in the K. setae ECR across the pair of dimers.

In a recent study, we reported the ternary structure of the ECR (EC 1.3.


In [55]:
facts_text[:500]

'Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nKitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nThe K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]\n\nUsing molecular d'

In [62]:
key_questions_dict, key_questions_list_string = get_key_questions_list_string(search_query_file_safe)
key_question_string = key_questions_dict[str(key_question_index + 1)]


key_questions
 {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?'}

numbered_key_questions_string
 0. What is the level of ECR enzyme activity in Kitsatospor setae bacteria?
1. How does ECR enzyme activity vary under different conditions?
2. What factors influence ECR enzyme activity in Kitsatospor setae bacteria?



In [65]:
with open(f'autoscious_logs/decompositions/{search_query_file_safe}.json', 'r', encoding='utf-8') as f:
    # Read the entire contents of the file
    decomposition = json.loads(f.read())

In [63]:
hypothesis_string = "ECR enzyme activity is high in Kitsatospor setae bacteria"

In [83]:
MAX_TOKENS = 10000

def get_initial_inference_prompt(decomposition, facts_text, max_tokens):
    return f'''
Decomposed research question:
{decomposition}

Facts: {facts_text[:max_tokens]}

Task: 
Based on the facts, report your findings on the current answer to the key question and how this verifies, falsifies, or changes the hypothesis. Be sure to use accurate direct quotes and their urls from the the facts used in each reasoning step. Think step by step.

The output should be of the JSON format: 
```json
{{
    "relevant_facts": "",
    "key_question_answer": "",
    "hypothesis_finding": "",
}}
```
'''

In [79]:
res = chat_openai(get_report_findings_prompt(hypothesis_string, key_question_string, facts_text), model="gpt-3.5-turbo")
print(res[0])

Prompt:  
Decomposed research question:
{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into

In [80]:
inferences = res[0]
print(inferences)

{
    "relevant_facts": "The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. The substrate-binding pocket in the open-form C or D subunit is more than 5 Å wider than the closed A or B subunit. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that 4 amino acids, N81, F170, E171, and H365, are required to create a highly efficient CO2-fixing enzyme.",
    "key_question_answer": "The level of ECR enzyme activity in Kitsatospor setae bacteria is not directly addressed in the provided facts.",
    "hypothesis_finding": "The hypothesis that ECR enzyme activity is high in Kitsatospor setae bacteria cannot be verified or falsified based on the provided facts."
}


In [87]:
def get_inference_verifiers_prompt(inference):
    return f'''
Inference
{inference}

Task:
You are well trained in logical reasoning and verification and understand JSON format. Based on the facts and assuming all the facts are true, use the following logical verification checks to ensure that the inference is valid.

1. Deductive consistency: Based on the premises and assuming all the premises are true, is the conclusion also true, partially true, or false? Does the conclusion contradict any of the premises or does it logically follow from them? And what percentage degree of support do the premises provide for the truth of the conclusion?
2. Coherence with background knowledge: True, partially true, or false, and to what percentage degree: does the conclusion align with what is generally accepted or known about the subject? Generate background knowledge about the conclusion.
3. Biases or fallacies: True or Fale: Are there any biases or logical fallacies present that might undermine the argument? Common ones to look for include hasty generalization, or appeal to ignorance.
4. Inference feedback: Provide feedback on the inference content itself.

Your response should be in this format:
1. Deductive consistency: 
Reasoning: 
Label: 
Score: 
2. Coherence with background knowledge: 
Background knowledge: 
Reasoning: 
Label: 
Score: 
3. Consistency: 
Reasoning: 
Label:
4. Inference feedback: 
Feedback: 

Be as critical, thorough, and specific in your responses as possible.
'''

In [88]:
MAX_TOKENS = 10000
model = "gpt-3.5-turbo"
search_query_file_safe = sanitize_filename(search_query)

# Get initial inferences
initial_inference = chat_openai(get_initial_inference_prompt(decomposition, facts_text, MAX_TOKENS), model=model)[0]

# Get verifier feedback on initial inference
inference_verifiers_res = chat_openai(get_inference_verifiers_prompt(initial_inference), model=model)[0]

Prompt:  
Decomposed research question:
{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity vary under different conditions?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n    \"relevant_facts\": \"The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state. The substrate-binding pocket in the open-form C or D subunit is more than 5 \u00c5 wider than the closed A or B subunit. Combining experimental biochemistry, protein crystallography, and advanced computer simulations, it has been shown that 4 amino acids, N81, F170, E171, and H365, are required to create a highly efficient CO2-fixing enzyme.\",\n    \"key_question_answer\": \"The level of ECR enzyme activity in Kitsatospor setae bacteria is not directly addressed in the provided facts.\",\n    \"hypothesis_finding\": \"The hypothesis that ECR enzyme activity is high in Kitsatospor setae bacteria cannot be verified or falsified based on the provided facts.\"\n}",
        "role": "assistan

In [89]:
print(inference_verifiers_res)

1. Deductive consistency:
Reasoning: The provided facts state that the substrate-binding pocket in the open-form subunit is wider than the closed-form subunit. It is also mentioned that specific amino acids are required for the enzyme to be highly efficient. However, there is no direct information about the level of ECR enzyme activity in Kitsatospor setae bacteria.
Label: Partially true
Score: 70%

2. Coherence with background knowledge:
Background knowledge: Carbon fixation enzymes are known to play a crucial role in capturing and converting carbon dioxide into organic compounds.
Reasoning: The conclusion that the level of ECR enzyme activity is high in Kitsatospor setae bacteria cannot be verified or falsified based on the provided facts. Without additional information or research, it is not possible to align the conclusion with what is generally accepted or known about the subject.
Label: Partially true
Score: 50%

3. Consistency:
Reasoning: There are no biases or logical fallacies

In [90]:
MAX_TOKENS = 10000

def get_inference_improved_prompt(initial_inference, inference_verifiers_res):
    return f'''
Inference
{initial_inference}

Feedback
{inference_verifiers_res}

Current Task: 
You are well trained in logical reasoning and verification and understand JSON format.  Based on the inference and feedback provieded, improve the soundness of the inference.

The output should be of the JSON format: 
```json
{{
    "relevant_facts": "",
    "key_question_answer": "",
    "hypothesis_finding": "",
}}
```
'''

In [126]:
def inference_with_verifiers(search_query, decomposition, facts_text, key_question_idx):
    MAX_TOKENS = 10000
    model = "gpt-3.5-turbo"
    search_query_file_safe = sanitize_filename(search_query)

    # Get initial inferences
    initial_inference = chat_openai(get_initial_inference_prompt(decomposition, facts_text, MAX_TOKENS), model=model)[0]

    # Get verifier feedback on initial inference
    inference_verifiers_res = chat_openai(get_inference_verifiers_prompt(initial_inference), model=model)[0]

    # Improve original decomposition with verifier feedback
    improved_inference_res = chat_openai(get_inference_improved_prompt(initial_inference, inference_verifiers_res), model=model)[0]

    # Get verifier feedback on final decomposition
    improved_inference_verifiers_res = chat_openai(get_inference_verifiers_prompt(improved_inference_res), model=model)[0]

    improved_inference_res_json = json.loads(improved_inference_res)

    # Save decomposition and verifier feedback to file
    with open(f'autoscious_logs/inferences/{search_query_file_safe}/kq{key_question_idx}_initial_inference.json', 'w') as f:
        json.dump(json.loads(initial_inference), f, indent=2)
    with open(f'autoscious_logs/inferences/{search_query_file_safe}/kq{key_question_idx}_initial_verifier_feedback.txt', 'w', encoding='utf-8') as f:
        f.write(inference_verifiers_res)
    with open(f'autoscious_logs/inferences/{search_query_file_safe}/kq{key_question_idx}_improved_inference.json', 'w') as f:
        json.dump(improved_inference_res_json, f, indent=2)
    with open(f'autoscious_logs/inferences/{search_query_file_safe}/kq{key_question_idx}_improved_verifier_feedback.txt', 'w', encoding='utf-8') as f:
        f.write(improved_inference_verifiers_res)

    # Return final inference, along with verifier feedback
    return improved_inference_res_json, improved_inference_verifiers_res

In [127]:
# Load in decomposition
with open(f'autoscious_logs/decompositions/{sanitize_filename(search_query)}/improved_decomposition.json', 'r') as f:
    decomposition = json.load(f)

key_question_decomposition_list = []

for driver_key, driver_value in decomposition['key_drivers'].items():
    for hypothesis_key, hypothesis_value in driver_value['hypotheses'].items():
        for question_key, question_value in hypothesis_value['key_questions'].items():
            new_decomposition = decomposition.copy()
            new_decomposition['key_drivers'] = {
                driver_key: {
                    'driver': driver_value['driver'],
                    'hypotheses': {
                        hypothesis_key: {
                            'hypothesis': hypothesis_value['hypothesis'],
                            'key_questions': {
                                question_key: question_value
                            }
                        }
                    }
                }
            }
            key_question_decomposition_list.append(new_decomposition)

print(key_question_decomposition_list)

[{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}, {'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'2': 'How does ECR enzyme activity vary under different conditions?'}}}}}}, {'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor se

In [131]:
for i, kq_decomposition in enumerate(key_question_decomposition_list):
    with open(f'autoscious_logs/kd_1/h_1/f_{i}.txt', 'r', encoding='utf-8') as f:
        kq_facts = f.read()
    inference_with_verifiers(search_query, kq_decomposition, kq_facts, i)

Prompt:  
Decomposed research question:
{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}

Facts: Intersubunit Coupling Enables Fast CO2-Fixation by Reductive Carboxylases[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Kitsatospor setae bacteria[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

ECR enzyme activity[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

The K. setae ECR is a homotetramer that differentiates into a pair of dimers of open- and closed-form subunits in the catalytically active state.[https://pubs.acs.org/doi/10.1021/acscentsci.2c00057]

Using molec

## Giving a conclusion and best answer based on hypothesis findings

In [54]:
search_query = "How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?"

# Load in decomposition
with open(f'autoscious_logs/{sanitize_filename(search_query)}/decompositions/improved_decomposition.json', 'r') as f:
    decomposition = json.load(f)

In [56]:
hypothesis_findings = ""
key_questions_dict = decomposition['key_drivers']['1']['hypotheses']['1']['key_questions']

for kq_idx, kq_val in key_questions_dict.items():
    with open(f'autoscious_logs/{search_query_file_safe}/inferences/kq{int(kq_idx) - 1}/improved_inference.json', 'r') as f:
        hypothesis_findings += f'\n\nKey question {kq_idx} findings: \n'
        hypothesis_findings += str(json.load(f))
    with open(f'autoscious_logs/{search_query_file_safe}/inferences/kq{int(kq_idx) - 1}/improved_verifier_feedback.txt', 'r', encoding='utf-8') as f:
        hypothesis_findings += f'\n\nKey question {kq_idx} logical verifier feedback: \n'
        hypothesis_findings += (f.read())

    print("hypothesis findings", hypothesis_findings)

hypothesis findings 

Key question 1 findings: 
{'relevant_facts': {'1': 'Awakening the Sleeping Carboxylase Function of Enzymes: Engineering the Natural CO2-Binding Potential of Reductases - This article discusses the engineering of reductase enzymes to enhance their CO2-binding potential.', '2': 'Heterotrophic metabolism is the biologic oxidation of organic compounds, such as glucose, to yield ATP and simpler organic (or inorganic) compounds, which are needed by the bacterial cell for biosynthetic or assimilatory reactions. - This fact provides a general understanding of heterotrophic metabolism in bacteria.', '3': 'The kinetic characteristics and regulation of aspartate carbamoyltransferase activity were studied in lysates and cell extracts of Helicobacter pylori by three different methods. - This fact is not directly related to ECR enzyme activity in Kitsatospor setae bacteria.', '4': 'This chapter covers the basic principles of enzymology, such as classification, structure, kineti

In [46]:
MAX_TOKENS = 10000

def get_key_driver_conclusion_prompt(decomposition, hypothesis_findings):
    return f'''
Decomposed research question:
{decomposition}

Hypothesis findings & feedback:
{hypothesis_findings}

Task: 
Based on the hypothesis findings and their feedback, report your conclusion on the key driver. Be sure to use accurate direct quotes and their urls from the the facts used in each reasoning step. Think step by step.

The output should be of the JSON format: 
```json
{{
    "relevant_findings": "<insert answer>",
    "key_driver_conclusion": "<insert answer>",
}}
```
'''

In [47]:
key_driver_conclusion = chat_openai(get_key_driver_conclusion_prompt(decomposition, hypothesis_findings), model="gpt-3.5-turbo")[0]

with open(f'autoscious_logs/{search_query_file_safe}/inferences/key_driver_conclusion.json', 'w') as f:
    json.dump(json.loads(key_driver_conclusion), f, indent=2)

Prompt:  
Decomposed research question:
{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}

Hypothesis findings & feedback:


Key question 1 findings: 
{'relevant_facts': {'1': 'Awakening the Sleeping Carboxylase Function of Enzymes: Engineering the Natural CO2-Binding Potential of Reductases - This article discusses the engin

Completion info:  {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n    \"relevant_findings\": \"The level of ECR enzyme activity in Kitsatospor setae bacteria is not directly addressed in the provided facts. The efficiency of ECR enzymes in Kitsatospor setae bacteria cannot be determined solely based on the provided facts. Enzyme activity can vary among different bacteria and is influenced by various factors. The potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria are not mentioned in the provided facts.\",\n    \"key_driver_conclusion\": \"Based on the provided facts, it is not possible to determine the efficiency or level of ECR enzyme activity in Kitsatospor setae bacteria, nor are the potential applications or implications of high ECR enzyme activity mentioned. Further research or additional facts are needed to draw a conclusion on these aspects.\"\n}",
        "role": "ass

In [49]:
MAX_TOKENS = 10000

def get_project_question_answer_prompt(decomposition, key_driver_conclusion):
    return f'''
Decomposed research question:
{decomposition}

Key driver conclusions:
{key_driver_conclusion}

Task: 
Based on the key driver conclusions, report your best answer to the research question.  Think step by step.

The output should be of the JSON format: 
```json
{{
    "reasoning": "<insert answer>",
    "project_question_answer": "<insert answer>",
}}
```
'''

In [50]:
res = chat_openai(get_project_question_answer_prompt(decomposition, key_driver_conclusion), model="gpt-3.5-turbo")
print(res[0])

Prompt:  
Decomposed research question:
{'project_question': 'How efficiently do the ECR enzymes work, especially in Kitsatospor setae bacteria?', 'project_objective': 'To determine the efficiency of ECR enzymes in Kitsatospor setae bacteria', 'key_drivers': {'1': {'driver': 'ECR enzyme activity', 'hypotheses': {'1': {'hypothesis': 'ECR enzyme activity is high in Kitsatospor setae bacteria', 'key_questions': {'1': 'What is the level of ECR enzyme activity in Kitsatospor setae bacteria?', '2': 'How does ECR enzyme activity compare to other bacteria?', '3': 'What factors influence ECR enzyme activity in Kitsatospor setae bacteria?', '4': 'What are the potential applications or implications of high ECR enzyme activity in Kitsatospor setae bacteria?'}}}}}}

Key driver conclusions:
{
    "relevant_findings": "The level of ECR enzyme activity in Kitsatospor setae bacteria is not directly addressed in the provided facts. The efficiency of ECR enzymes in Kitsatospor setae bacteria cannot be de

In [52]:
final_answer = res[0]
print(final_answer)

{
    "reasoning": "Based on the key driver conclusions, it is not possible to determine the efficiency or level of ECR enzyme activity in Kitsatospor setae bacteria, nor are the potential applications or implications of high ECR enzyme activity mentioned. Further research or additional facts are needed to draw a conclusion on these aspects.",
    "project_question_answer": "Based on the available information, it is not possible to determine the efficiency of ECR enzymes in Kitsatospor setae bacteria or the potential applications or implications of high ECR enzyme activity."
}


In [53]:
with open(f'autoscious_logs/{search_query_file_safe}/inferences/best_answer.json', 'w') as f:
    json.dump(json.loads(final_answer), f, indent=2)