In [1]:
import eunomia
from eunomia.agents import Eunomia
from eunomia.tools import EunomiaTools
# from eunomia.tools import get_cif_from_COD, rename_cif, get_cif_from_CCDC

In [2]:
tools = EunomiaTools(tool_names = ['get_cif_from_COD', 'rename_cif']).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Download cif files for this doi: 10.1021/cg301691d from COD and rename them.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to first download the CIF files from the COD using the provided DOI. Then, I will rename them using the MOF-Name associated with a target key.
Action: get_cif_from_COD
Action Input: 10.1021/cg301691d[0m

ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [2]:
tools = EunomiaTools(tool_names = ['get_cif_from_CCDC']).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Download cif files for this doi: 10.1021/cg301691d from CCDC.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to use the tool get_cif_from_CCDC to download the CIF file for the given DOI.
Action: get_cif_from_CCDC
Action Input: 10.1021/cg301691d[0m
Observation: [36;1m[1;3mNone[0m
Thought:[32;1m[1;3mThe CIF file for the DOI 10.1021/cg301691d has been downloaded.
Final Answer: The CIF file for the DOI 10.1021/cg301691d has been downloaded.[0m

[1m> Finished chain.[0m
Tokens Used: 484
	Prompt Tokens: 397
	Completion Tokens: 87
Successful Requests: 2
Total Cost (USD): $0.01713


'The CIF file for the DOI 10.1021/cg301691d has been downloaded.'

In [2]:
paper_id = '69'
docs_processor = eunomia.LoadDoc('../Data/chosen_papers/', paper_id=paper_id)
# sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
#                                              chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')
sliced_pages = docs_processor.process([],
                                             chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')

In [3]:
from langchain.chains import RetrievalQA
import os
import pickle
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

k = 6
# Embedding_model = 'text-davinci-003' # text-embedding-ada-002
Embedding_model = 'text-embedding-ada-002' # text-embedding-ada-002 text-davinci-003


faiss_index_path = f"../Data/chosen_papers/faiss/{Embedding_model}/faiss_index_{Embedding_model}_{paper_id}.pkl"
just_load = False
if os.path.isfile(faiss_index_path) and just_load:
    print('loadded Embeddings')
    with open(faiss_index_path, "rb") as f:
        faiss_index = pickle.load(f)
else:
    with open(faiss_index_path, "wb") as f:  # text-embedding-ada-002
        faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))    # 'text-davinci-003
        pickle.dump(faiss_index, f)

# faiss_index = LanceDB.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))

retriever  = faiss_index.as_retriever(search_type="mmr", search_kwargs={"k":k, 'fetch_k': 50}) # Maximum Marginal Relevance Retrieval


In [4]:
tools = EunomiaTools(tool_names = ['read_doc'], retriever=retriever).get_tools()
agent = Eunomia(tools=tools, model='gpt-4', get_cost=True)
agent.run(prompt="Read the document and find the MOFS and their water stability.")

ValidationError: 1 validation error for Tool
func
  <classmethod object at 0x000001BB9FFED3A0> is not callable (type=type_error.callable; value=<classmethod object at 0x000001BB9FFED3A0>)

In [4]:
import json
# Read in structure data
with open("../data/qmof_database/qmof_structure_data.json") as f:
    qmof_struct_data = json.load(f)

In [5]:
for entry in qmof_struct_data:
    print(entry)
    break

{'qmof_id': 'qmof-8a95c27', 'name': 'ABACUF01_FSR', 'structure': {'@module': 'pymatgen.core.structure', '@class': 'Structure', 'charge': 0, 'lattice': {'matrix': [[6.577866356, 0.001198396, 2.218642526], [0.78166899, 7.070336339, 0.96662035], [0.000658751, 0.000799513, 8.791651588]], 'a': 6.9419523112540436, 'b': 7.17878940055673, 'c': 8.791651649033692, 'alpha': 82.25599709677074, 'beta': 71.35701701555689, 'gamma': 81.58285544631552, 'volume': 408.857471262788}, 'sites': [{'species': [{'element': 'Ba', 'occu': 1}], 'abc': [0.09830491028584015, 0.7343525664213004, 0.35340986612669667], 'xyz': [1.220889999999993, 5.192519999999996, 4.034999999999976], 'label': 'Ba', 'properties': {'pbe_ddec_sum_bond_order': 1.559709, 'pbe_ddec_charge': 1.48179, 'pbe_cm5_charge': 0.910193, 'pbe_bader_charge': 1.705145, 'pbe_magmom': 0.001, 'pbe_ddec_spin_density': 0.001119, 'pbe_bader_spin_density': 0.001378}}, {'species': [{'element': 'Ba', 'occu': 1}], 'abc': [0.901694538181694, 0.265646832757696, 0.6

In [4]:
agent.run("Download cif files for this doi: 10.1021/cg301691d from COD and rename them.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to first download the CIF files from the COD using the provided DOI, then rename them using the MOF-Name associated with a target key.
Action: get_cif_from_COD
Action Input: 10.1021/cg301691d[0m
Observation: [36;1m[1;3m['4507517.cif', '4507518.cif', '4507519.cif', '4507520.cif', '4507521.cif', '4507522.cif'][0m
Thought:[32;1m[1;3mNow that I have the CIF files, I need to rename them using the MOF-Name associated with a target key.
Action: rename_cif
Action Input: '4507517.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m[1;3mThe first file has been renamed. I need to repeat this process for the remaining files.
Action: rename_cif
Action Input: '4507518.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m[1;3mThe second file has been renamed. I need to repeat this process for the remaining files.
Action: rename_cif
Action Input: '4507519.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:[32;1m

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


[32;1m[1;3mThe fourth file has been renamed. I need to repeat this process for the remaining files.
Action: rename_cif
Action Input: '4507521.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


[32;1m[1;3mThe fifth file has been renamed. I need to repeat this process for the last file.
Action: rename_cif
Action Input: '4507522.cif'[0m
Observation: [33;1m[1;3mNone[0m
Thought:

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..


[32;1m[1;3mAll the CIF files have been renamed successfully.
Final Answer: All CIF files for the DOI 10.1021/cg301691d have been downloaded and renamed.[0m

[1m> Finished chain.[0m
Tokens Used: 3862
	Prompt Tokens: 3565
	Completion Tokens: 297
Successful Requests: 8
Total Cost (USD): $0.12476999999999999


'All CIF files for the DOI 10.1021/cg301691d have been downloaded and renamed.'