In [1]:
import eunomia
from eunomia.agents import Eunomia
from eunomia.tools import EunomiaTools

In [2]:
import logging
logging.getLogger().setLevel(logging.CRITICAL)
import warnings

# Disable all warning messages
warnings.filterwarnings("ignore")

# Obtaining Information from Online Datasets

## Get chemical structures (CIF files) from Crystallography Open Database (COD)

In [3]:
tools = EunomiaTools(tool_names = ['get_cif_from_COD', 'rename_cif']).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Download cif files for this doi: 10.1021/cg301691d from COD and rename them.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to first download the CIF files from the COD using the provided DOI, then rename them using the MOF-Name associated with a target key.
Action: get_cif_from_COD
Action Input: 10.1021/cg301691d[0m
Observation: [36;1m[1;3m['4507517.cif', '4507518.cif', '4507519.cif', '4507520.cif', '4507521.cif', '4507522.cif'][0m
Thought:[32;1m[1;3mNow that I have the CIF files, I need to rename them using the MOF-Name associated with a target key.
Action: rename_cif
Action Input: '4507517.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m[1;3mI have renamed the first file. Now I need to rename the rest of the files.
Action: rename_cif
Action Input: '4507518.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m[1;3mI have renamed the second file. Now I need to rename the rest of the files.
Action: rename_cif
Action Input: '4507519.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m[1;3mI have renamed the t

'All CIF files have been downloaded and renamed.'

## Get chemical structures (CIF files) from Cambridge Structural Database (CCDC)

In [2]:
tools = EunomiaTools(tool_names = ['get_cif_from_CCDC']).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Download cif files for this doi: 10.1021/cg301691d from CCDC.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to use the tool get_cif_from_CCDC to download the CIF file for the given DOI.
Action: get_cif_from_CCDC
Action Input: 10.1021/cg301691d[0m
Observation: [36;1m[1;3mNone[0m
Thought:[32;1m[1;3mThe CIF file for the given DOI has been downloaded.
Final Answer: The CIF file for the DOI 10.1021/cg301691d has been successfully downloaded from CCDC.[0m

[1m> Finished chain.[0m
Tokens Used: 479
	Prompt Tokens: 397
	Completion Tokens: 82
Successful Requests: 2
Total Cost (USD): $0.016829999999999998


'The CIF file for the DOI 10.1021/cg301691d has been successfully downloaded from CCDC.'

# Obtaining Information from Local PDFs

In [3]:
paper_id = '69'
docs_processor = eunomia.LoadDoc('../Data/chosen_papers/', paper_id=paper_id)
sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
                                             chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')

In [4]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

Embedding_model = 'text-embedding-ada-002' # text-embedding-ada-002 text-davinci-003
faiss_index_path = f"../Data/chosen_papers/faiss/{Embedding_model}/faiss_index_{Embedding_model}_{paper_id}.pkl"
faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))

In [5]:
tools = EunomiaTools(tool_names = ['read_doc', 'eval_justification', 'recheck_justification'], vectorstore=faiss_index).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Read the document and find the MOFS and their water stability.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to read the document to find information about the water stability of various MOFs.
Action: read_doc
Action Input: User's original prompt[0m
k=9 results hitting the token limit for the chosen LLM. Reducing k and retrying...

k=8 results hitting the token limit for the chosen LLM. Reducing k and retrying...

Observation: [36;1m[1;3m1. MOF: [ZrO(O2C-R-CO2)] (MIL-140A)
   Water Stability: Stable
   Probability Score: 0.9
   Justification: "Thermogravimetric analysis (TGA) and IR spectroscopy were therefore used to qualitatively assess the hydrophobic character of these solids. TGA measurements have shown that the samples exhibit almost no loss of free water (Supporting Information Figure S18)."

2. MOF: [ZrO(O2C-R-CO2)] (MIL-140B)
   Water Stability: Stable
   Probability Score: 0.9
   Justification: "Thermogravimetric analysis (TGA) and IR spectroscopy were therefore used to qualitatively assess the hydrophobic charac

'The MOFs and their water stability are as follows:\n\n1. MOF: [ZrO(O2C-R-CO2)] (MIL-140A)\n   Water Stability: Stable\n   Justification: "Thermogravimetric analysis (TGA) and IR spectroscopy were therefore used to qualitatively assess the hydrophobic character of these solids. TGA measurements have shown that the samples exhibit almost no loss of free water (Supporting Information Figure S18)."\n\n2. MOF: [ZrO(O2C-R-CO2)] (MIL-140B)\n   Water Stability: Stable\n   Justification: "Thermogravimetric analysis (TGA) and IR spectroscopy were therefore used to qualitatively assess the hydrophobic character of these solids. TGA measurements have shown that the samples exhibit almost no loss of free water (Supporting Information Figure S18)."\n\n3. MOF: [ZrO(O2C-R-CO2)] (MIL-140C)\n   Water Stability: Stable\n   Justification: "Thermogravimetric analysis (TGA) and IR spectroscopy were therefore used to qualitatively assess the hydrophobic character of these solids. TGA measurements have shown