In [1]:
import os
import eunomia
import pickle

In [2]:
from langchain.llms import OpenAI
from langchain.agents import Tool, tool
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

In [3]:
paper_id = '62'
docs_processor = eunomia.LoadDoc('../Data/chosen_papers/', paper_id=paper_id)
# sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
#                                              chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')
sliced_pages = docs_processor.process([],
                                             chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')

In [4]:
from langchain.chains import RetrievalQA

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

k = 6
# Embedding_model = 'text-davinci-003' # text-embedding-ada-002
Embedding_model = 'text-embedding-ada-002' # text-embedding-ada-002 text-davinci-003


faiss_index_path = f"../Data/chosen_papers/faiss/{Embedding_model}/faiss_index_{Embedding_model}_{paper_id}.pkl"
just_load = False
if os.path.isfile(faiss_index_path) and just_load:
    print('loadded Embeddings')
    with open(faiss_index_path, "rb") as f:
        faiss_index = pickle.load(f)
else:
    with open(faiss_index_path, "wb") as f:  # text-embedding-ada-002
        faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))    # 'text-davinci-003
        pickle.dump(faiss_index, f)

# faiss_index = LanceDB.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))

retriever  = faiss_index.as_retriever(search_type="mmr", search_kwargs={"k":k, 'fetch_k': 50}) # Maximum Marginal Relevance Retrieval


In [6]:
Rule = f"""
There are only 3 options for water stability: 
1. Stable: No change in properties after exposure to moisture or steam, soaking or boiling in water or an aqueous solution.
Retaining its porous structure in solution.
No loss of crystallinity.
Water adsorption isotherm should have a steep uptake.
Good cycling performance.

2. Unstable 
The MOF will decompose or change properties or has a change in its crystal structure after exposure/soak to a humid environment, steam or if it partially dissolves in water.


3. Not provided, 
If you don't know or cannot justfy 1 or 2
"""

# Steep uptake behavior is from this paper: https://pubs.acs.org/doi/epdf/10.1021/ja306812r

In [7]:
prompt3a = f"""
    You are an expert chemist. The document describes the water stability properties of a few materials including
    Metal-Organic Frameworks(MOF) or coordination polymers, coordination networks or ZIF compounds.
    Find the exact full name for materials, and their stability in water.
    As an example, sentence "UiO-66-X compounds with X = NO2, NH2, OH, CH3 and (CH3)2 show enhanced CO2 uptake", contains the five following MOFS:
    [UiO-66-NO2, UiO-66-NH2, UiO-66-OH, UiO-66-CH3, UiO-66-(CH3)2], but does not provide any information their water stability.
    Use this example to guide yourself better on finding other MOFs in the document.

    Use the following rules to determine water stability:
    {Rule}
    
    Your final answer should contain the following:
    1. The water stability of each MOF.
    2. For each mof:  a probability score ranging between [0, 1]. This probability score shows how certain you are in your answer.
    3. The exact sentences without any changes from the document that justifies your decision. Try to find more than once sentence. This should be "Not provided" if you cannot find water stability.
    """

In [15]:
llm = OpenAI(temperature=0, model_name='gpt-4') # gpt-3.5-turbo-16k
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever)
result = qa_chain.run('Find all the MOF names in this document and put them in a list')
result

'["Ni3(BTP) 2$3DMF $5CH 3OH$17H 2O( 1, green)", "Cu 3(BTP) 2$8- CH 3OH$10H 2O(2, blue)", "Zn 3(BTP) 2$4CH 3OH$2H2O(3, orange)", "Co3(BTP) 2$8CH 3OH$10H 2O(4, purple)", "Mn-BTT", "Zn(1,3-BDP)", "Co(1,3- BDP)", "UIO-66"]'

In [20]:
from langchain.agents import Tool, tool

@tool
def eval_justification(justification):
    '''
    Always use this tool to validate justification. This function checks if the justification talks about the water stability perdiction makes sense for each MOF. 
    '''
    import openai
    model="gpt-4"
    prompt = f"""
            Do the below sentences actually talk about water stability of the found MOF?
            If not, try to find a better justification for that MOF in the document .
            
            "{justification}"

            To do this, you should check on steep uptakes, change in properties after being exposed to water/steam, change in crystallinity, or mention of water stability in the sentence.
            If the justification can somehow imply water stability, keep it but lower your "Probability score".
            Do not make up answers.
            Do not consider chemical or thermal stability or stability in air as a valid reason. 
            """
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, 
    )
    return response.choices[0].message["content"]



@tool
def read_doc(input):
    '''
    Input the users original prompt and get context on water stability of metal organic frameworks.
    Always search for the answers using this tool first, don't make up answers yourself.
    '''

    k = 8
    min_k = 4  # Minimum limit for k
    llm = OpenAI(temperature=0, model_name='gpt-4')  
    result = eunomia.RetrievalQABypassTokenLimit(prompt3a, faiss_index, k=k, min_k=min_k, llm=llm,
                         search_type="mmr", fetch_k=50, chain_type="stuff", memory=None)
    return result

@tool
def recheck_justification(MOF_name):
    '''
    This tool reads the document again for the specific MOF_name and tries to find a better justification for its water stability. 
    '''
    input_prompt = f"""
        You are an expert chemist. The document describes the water stability properties of {MOF_name}
    
        Use the following rules to determine its water stability:
        {Rule}
        
        Your final answer should contain the following:
        1. The water stability of the MOF.
        2. The probability score ranging between [0, 1]. This probability score shows how certain you are in your answer.
        3. The exact sentences without any changes from the document that justifies your decision. Try to find more than once sentence. This should be "Not provided" if you cannot find water stability.
        """
    k = 6
    min_k = 4  # Minimum limit for k
    llm = OpenAI(temperature=0, model_name='gpt-4')  
    result = eunomia.RetrievalQABypassTokenLimit(input_prompt, faiss_index, k=k, min_k=min_k, llm=llm,
                         search_type="mmr", fetch_k=50, chain_type="stuff", memory=None)
    return result

tools = [read_doc, eval_justification, recheck_justification]


In [21]:
from langchain.chat_models  import ChatOpenAI
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.callbacks import get_openai_callback

from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history")

agent_llm = OpenAI(temperature=0, model_name='gpt-4')



agent_chain = initialize_agent(tools, agent_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, # CONVERSATIONAL_REACT_DESCRIPTION ZERO_SHOT_REACT_DESCRIPTION
                               verbose=True, memory=memory, handle_parsing_errors=True,
                              prefix= None, suffix=None, output_parser=None, max_iterations=25, stop=["\nObservation:"])

In [22]:
output_format = '''    Your final answer should contain the following:
    1. The water stability of each MOF.
    2. For each mof:  a probability score ranging between [0, 1]. This probability score shows how certain you are in your answer.
    3. The exact sentences without any changes from the document that justifies your decision. Try to find more than one sentence. Do not summarize. Do not make up a justification that does not exist in the doc.
    This should be "Not provided" if you cannot find water stability.'''

input_prompt = f'''You are an expert chemist. Read paper {paper_id}.pdf, find all the mentioned MOFs and their water stability with a justification of your answers.
                Evaluate each justification. 
                If the justification can imply water stability, use it but lower your "Probability score" for that MOF.
                If the justification is not valid, recheck the justification, if still not valid change "Water Stability" and "Justification" to "Not provided" in your final answer.

                {output_format}

'''

with get_openai_callback() as cb:
    result = agent_chain.run(input=input_prompt)
    print(cb)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to read the document first to understand the context and find the mentioned MOFs and their water stability.
Action: read_doc
Action Input: 62.pdf[0m
k=8 results hitting the token limit for the chosen LLM. Reducing k and retrying...


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<loca


Observation: [36;1m[1;3m1. MOF: Ni3(BTP)2
   Water Stability: Stable
   Probability Score: 1
   Justification: "The Ni3(BTP)2 framework of 1 is stable to all of the environments tested and maintains both its crystallinity and porous nature after 14 days of uninterrupted test reactions. Powder X-ray diffraction data collected before and after each test confirm its structural chemical integrity."

2. MOF: Cu3(BTP)2
   Water Stability: Unstable
   Probability Score: 1
   Justification: "In contrast, the copper- and zinc-based frameworks of 2 and 3, undergo transformation to non-porous crystalline solids upon extreme chemical treatment, as rather commonly observed for metal–organic frameworks."

3. MOF: Zn3(BTP)2
   Water Stability: Stable
   Probability Score: 0.8
   Justification: "Remarkably, the Zn3(BTP)2 framework of 3 is stable to all of the environments tested and maintains both its crystallinity and porous nature after 14 days of uninterrupted test reactions."

4. MOF: Co3(BTP)2

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<loca


Observation: [38;5;200m[1;3m1. The water stability of Cu3(BTP)2 is unstable.
2. The probability score is 1.
3. The exact sentences from the document that justify this decision are: "As depicted in Fig. 8, compound 2 shows a progressive phase transition in boiling water, converting to 2 0. This transformation occurs upon reﬂuxing 2 in aqueous NaOH (pH 14) or HCl (pH 3) solutions for one day." and "Despite its extremely high thermal stability, compound 3 displays a resistance to hot acidic media that is somewhat inferior to that of 1. While its structure is maintained upon heating at 100 /C14C in pH 3 aqueous HCl for 7 days, as shown in Fig. 9, it is not stable to a similar treatment at pH 2."[0m
Thought:[32;1m[1;3mThe rechecked justification for Cu3(BTP)2 explicitly mentions water stability. Now, I need to evaluate the justification for the next MOF.
Action: eval_justification
Action Input: "Remarkably, the Zn3(BTP)2 framework of 3 is stable to all of the environments tested and m

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'
Retrying langchain.llms.openai.completion_with_retry.<loca


Observation: [38;5;200m[1;3m1. The water stability of the MOF Zn3(BTP)2 is unstable.
2. The probability score is 1.
3. The exact sentences from the document that justify this decision are: "When compound 3was heated in boiling water, a new crys-talline phase Zn 12[Zn 2(H2O)2]6(BTP) 16(30) was obtained, as identiﬁed by X-ray powder diffraction." and "Although 3is highly resistant to high temperatures (up to 510 /C14C), the solid-state transformation in basic pH conditions occurs at room temperature in a veryshort time, indicating that the presence of water is critical to itsinstability."[0m
Thought:

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'


[32;1m[1;3mThe rechecked justification for Zn3(BTP)2 explicitly mentions water stability. Now, I need to evaluate the justification for the next MOF.
Action: eval_justification
Action Input: Not provided[0m

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'



Observation: [33;1m[1;3mAs the text "Not provided" doesn't provide any information about the water stability of the found MOF, it's impossible to determine whether it discusses this topic.[0m
Thought:[32;1m[1;3mThe justification for Co3(BTP)2 is not provided in the document. Therefore, I cannot evaluate it. Now, I need to evaluate the justification for the next MOF.
Action: eval_justification
Action Input: "The zirconium-based framework UIO-66, has been shown to display thermal stability up to 540/C14C, but its chemical stability in water and common organic solvents was verified only for no longer than 24 h at room temperature."[0m

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-ch7xZkxfNhNOfiyd4gkEf754 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Error in OpenAICallbackHandler.on_retry callback: 'OpenAICallbackHandler' object has no attribute 'on_retry'



Observation: [33;1m[1;3mYes, the sentence does talk about the water stability of the found MOF. It mentions that the chemical stability of UIO-66 in water was verified for up to 24 hours at room temperature.[0m
Thought:[32;1m[1;3mThe justification for UIO-66 explicitly mentions water stability. I have evaluated all the justifications for the MOFs mentioned in the document.
Final Answer: 
1. Ni3(BTP)2: 
   - Water Stability: Stable
   - Probability Score: 1
   - Justification: "The chemical resistance of 1–3was examined by suspending samples of the compounds in boiling water, boiling aqueous HClor HNO 3solutions at pH 2, and a boiling aqueous NaOH solution at pH 14, conditions that reﬂect extreme operationalparameters in industry." and "Remarkably, the Ni 3(BTP) 2framework of 1is stable to all of the environments tested and maintains both its crystallinity andporous nature after 14 days of uninterrupted test reactions."

2. Cu3(BTP)2: 
   - Water Stability: Unstable
   - Probabili

In [23]:
eunomia.parse_to_dict(result, paper_id)

{'Ni3(BTP)2': {'Predicted Stability': 'Stable',
  'Score': 1.0,
  'Justification': 'The chemical resistance of 1–3was examined by suspending samples of the compounds in boiling water, boiling aqueous HClor HNO 3solutions at pH 2, and a boiling aqueous NaOH solution at pH 14, conditions that reﬂect extreme operationalparameters in industry. Remarkably, the Ni 3(BTP) 2framework of 1is stable to all of the environments tested and maintains both its crystallinity andporous nature after 14 days of uninterrupted test reactions.',
  'Paper id': '62'},
 'Cu3(BTP)2': {'Predicted Stability': 'Unstable',
  'Score': 1.0,
  'Justification': 'As depicted in Fig. 8, compound 2 shows a progressive phase transition in boiling water, converting to 2 0. This transformation occurs upon reﬂuxing 2 in aqueous NaOH (pH 14) or HCl (pH 3) solutions for one day. Despite its extremely high thermal stability, compound 3 displays a resistance to hot acidic media that is somewhat inferior to that of 1. While its st