In [None]:
# Given facts from the searcher, plan next steps

In [1]:
from util import sanitize_filename
import os
import json
from llm import chat_openai

In [2]:
search_query = "How efficiently do the ECR enzymes work in Kitsatospor setae bacteria?"
search_query_file_safe = sanitize_filename(search_query)

## Verifier

In [3]:
file_name = f'autoscious_logs/{search_query_file_safe}/facts/facts.txt'

with open(file_name, 'r', encoding='utf-8') as f:
    facts_list = f.read()

In [4]:
facts_list

"The ECR enzymes in Kitsatospor setae bacteria work with a Michaelis-Menten constant (Km) of 26 ± 3 μM.\n\nThe ECR activity in the WT tetramer is increased by 2 orders of magnitude in comparison to the triple variant E151D/N157E/N218E.\n\nThe ECR enzymes in Kitsatospor setae bacteria work efficiently by forming a thermodynamically stable covalent adduct between the substrate and the NADPH cofactor.\n\nThe bacterial enzyme ECRs in Kitasatospora setae work 100 times faster than without the presence of molecular 'glue' and twist.\n\n20 times faster than plant enzymes do during photosynthesis.\n\nThe ECR enzymes in Kitsatospor setae bacteria work efficiently in CO2 fixation.\n\nThe ECR enzymes in Kitsatospor setae bacteria are highly efficient.\n\nECRs are some of the most efficient CO2-fixing enzymes described to date.\n\n"

In [4]:
# split the string into a list of sentences by '\n\n'
sentences = facts_list.split('\n\n')

# create a dictionary where the key is the index + 1 (to start at 1) and the value is the sentence
facts_dict = {i+1: sentence for i, sentence in enumerate(sentences) if sentence.strip()}

print(facts_dict)

{1: 'The ECR enzymes in Kitsatospor setae bacteria work with a Michaelis-Menten constant (Km) of 26 ± 3 μM.', 2: 'The ECR activity in the WT tetramer is increased by 2 orders of magnitude in comparison to the triple variant E151D/N157E/N218E.', 3: 'The ECR enzymes in Kitsatospor setae bacteria work efficiently by forming a thermodynamically stable covalent adduct between the substrate and the NADPH cofactor.', 4: "The bacterial enzyme ECRs in Kitasatospora setae work 100 times faster than without the presence of molecular 'glue' and twist.", 5: '20 times faster than plant enzymes do during photosynthesis.', 6: 'The ECR enzymes in Kitsatospor setae bacteria work efficiently in CO2 fixation.', 7: 'The ECR enzymes in Kitsatospor setae bacteria are highly efficient.', 8: 'ECRs are some of the most efficient CO2-fixing enzymes described to date.'}


In [56]:
# GPT template
# Improvement: it should cite direct quote facts, so that the direct quote and url can be retrieved in the answer.

prompt = f'''
Key question: 
{search_query}

Context from manager:
Ideally I'm looking for a numerical answer.

Task:
You are a verifier. Given the key question, context from manager, and facts, what is the best answer to the key question? Outline both the proposition of why it is a first-rate answer, and the opposition of what the answer's biggest weaknesses might be to make it not a first-rate answer. Make sure you cite facts in your conclusion. Ensure that your conclusion satisfies the context feedback from the manager because you are verifying if you have a first-rate answer for the manager.

The output should be in JSON format: 
```json
{{
  "best_answer": "<insert best answer based on facts to key question with fact citations>",
  "opposition":  "<insert reasons why the best answer might not be a first-rate answer to the key question>",
  "proposition": "<insert reasons why the best answer might be a first-rate answer to the key question>",
  "conclusion": "<insert yes or no if the best answer is a first-rate answer to the key question, and then your reasoning>"
}}
```
Use a double square bracket around the index of the fact to cite it. For example, [[1]] means fact 1.

Most relevant facts (ordered from most to least relevant):
{facts_dict}

Respond only with the output, with no explanation or conversation.
'''
verifier_feedback = json.loads(chat_openai(prompt, model="gpt-3.5-turbo")[0])
print(verifier_feedback)

Prompt:  
Key question: 
How efficiently do the ECR enzymes work in Kitsatospor setae bacteria?

Context from manager:
Ideally I'm looking for a numerical answer.

Task:
You are a verifier. Given the key question, context from manager, and facts, what is the best answer to the key question? Outline both the proposition of why it is a first-rate answer, and the opposition of what the answer's biggest weaknesses might be to make it not a first-rate answer. Make sure you cite facts in your conclusion. Ensure that your conclusion satisfies the context feedback from the manager because you are verifying if you have a first-rate answer for the manager.

The output should be in JSON format: 
```json
{
  "best_answer": "<insert best answer based on facts to key question with fact citations>",
  "opposition":  "<insert reasons why the best answer might not be a first-rate answer to the key question>",
  "proposition": "<insert reasons why the best answer might be a first-rate answer to the key 

In [57]:
with open(f'autoscious_logs/{search_query_file_safe}/facts/facts.json', 'r', encoding='utf-8') as f:
    facts_json = json.load(f)

In [58]:
import re
import string

def replace_references(feedback, facts_dict, facts_json):
    def replacer(match):
        index = int(match.group(1))
        fact = facts_dict[index]
        
        # Find the corresponding URL for the fact in facts_json
        fact_stripped = fact.strip(string.punctuation)
        quote_url = None
        for key in facts_json:
            if fact_stripped in key:
                quote_url = facts_json[key]
                break
            
        if quote_url:
            return f'[{quote_url}]'
        else:
            return f'[]'

    feedback['best_answer'] = re.sub(r'\[\[(\d+)\]\]', replacer, feedback['best_answer'])
    feedback['proposition'] = re.sub(r'\[\[(\d+)\]\]', replacer, feedback['proposition'])
    feedback['opposition'] = re.sub(r'\[\[(\d+)\]\]', replacer, feedback['opposition'])
    feedback['conclusion'] = re.sub(r'\[\[(\d+)\]\]', replacer, feedback['conclusion'])

    return feedback

verifier_feedback_updated = replace_references(verifier_feedback, facts_dict, facts_json)
print(verifier_feedback_updated)


{'best_answer': "The ECR enzymes in Kitsatospor setae bacteria are highly efficient, working with a Michaelis-Menten constant (Km) of 26 ± 3 μM [± 25 1.1 ± 0.0 26 ± 3 0.7 ± 0.0 440 ± 40 0.9 ± 0.0 a Michaelis−Menten curves of K. setae ECR and its variants are provided in Figure S14.[https://www.researchgate.net/figure/Reaction-scheme-and-structural-organization-of-the-K-setae-ECR-complex-a_fig1_360182999]]. The ECR activity in the WT tetramer is increased by 2 orders of magnitude in comparison to the triple variant E151D/N157E/N218E [turned out that only negative cooperativity could explain our experimental observations that ECR activity in the WT tetramer is increased by 2 orders of magnitude in comparison to the triple variant E151D/N157E/N218E, in which synchronization is decoupled (Table 1).[https://www.researchgate.net/figure/Reaction-scheme-and-structural-organization-of-the-K-setae-ECR-complex-a_fig1_360182999]]. The ECR enzymes work efficiently by forming a thermodynamically sta

In [60]:
file_name = f'autoscious_logs/{search_query_file_safe}/facts/evaluation.json'

with open(file_name, 'w', encoding='utf-8') as f:
    json.dump(verifier_feedback_updated, f, indent=4)

## Analyzer -- figuring out next steps based on evaluator feedback