In [1]:
import os
import eunomia
import pickle


In [2]:
paper_id = '70'
docs_processor = eunomia.LoadDoc('../Data/chosen_papers/', paper_id=paper_id)
sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
                                             chunk_size=1000, chunk_overlap=20, chunking_type='fixed-size')

In [3]:
from langchain.chains import RetrievalQA

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

k = 5
# Embedding_model = 'text-davinci-003' # text-embedding-ada-002
Embedding_model = 'text-embedding-ada-002' # text-embedding-ada-002


faiss_index_path = f"../Data/chosen_papers/faiss/{Embedding_model}/faiss_index_{Embedding_model}_{paper_id}.pkl"
just_load = True
if os.path.isfile(faiss_index_path) and just_load:
    print('loadded Embeddings')
    with open(faiss_index_path, "rb") as f:
        faiss_index = pickle.load(f)
else:
    with open(faiss_index_path, "wb") as f:  # text-embedding-ada-002
        faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))    # 'text-davinci-003
        pickle.dump(faiss_index, f)

# faiss_index = LanceDB.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))

retriever  = faiss_index.as_retriever(search_type="mmr", search_kwargs={"k":k, 'fetch_k': 50}) # Maximum Marginal Relevance Retrieval


loadded Embeddings


In [4]:
from langchain.llms import OpenAI
from langchain.agents import Tool, tool
from langchain.chat_models  import ChatOpenAI
from langchain.chains import RetrievalQA
llm = OpenAI(temperature=0, model_name='gpt-3.5-turbo-16k') # gpt-3.5-turbo-16k
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False,)



In [5]:
Rule = f"""
There are only 3 options for water stability: \
1. Stable \
2. Unstable \
3. Not provided\
For 1. Stable, \
No change after exposure, soaking or boiling in water or an aqueous solution.\
Water adsorption isotherm should have a steep uptake.\
For 2. Unstable, \
The MOF will decompose after exposure \
to humid environment or partially dissolves in water. \
For 3. Not provided, \
If you don't know or cannot justfy 1 or 2
"""

# Steep uptake behavior is from this paper: https://pubs.acs.org/doi/epdf/10.1021/ja306812r

In [6]:
prompt3a = f"""
    You are an expert chemist. The doc tool describes the water stability properties of a few materials including \
    Metal-Organic Frameworks(MOF) or coordination polymers, Coordination Networks or ZIF compounds. Please find \
    the full name for materials, and their \
    stability in water. \
    Your final answer should contain the following:\
    Your first output is the water stability of each MOF. \
    Your second output for each mof should be a probability score ranging between [0, 1].\
    This probability score shows how certain you are in your answer.\
    Your third output should be exact sentences without any changes from the document that justifies your decision.\
    This should be "Not provided" if you cannot find water stability.\
    Rule: {Rule}\
    If there are multiple MOFs, please list them all and put "MOF name" before them. \
    You should only respond in JSON format as described below. 
    Make sure it is parsable using Python json.load.
    Make sure "MOF name"s do not have duplicates
    If information is not provided, state it as not provided.\
    """

In [7]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
qa.run(prompt3a)

'{\n  "UiO-66": {\n    "water_stability": "Stable",\n    "probability_score": 0.9,\n    "justification": "UiO-66 has high thermal and chemical stability, making it suitable for CO2/CH4 gas separation. It has been demonstrated to retain its crystallinity after treatment with water, acetic acid, and 1mHCl solutions."\n  },\n  "UiO-66-F": {\n    "water_stability": "Unstable",\n    "probability_score": 0.8,\n    "justification": "UiO-66-F loses its crystallinity after treatment with 1mNaOH solution, indicating its instability in water."\n  },\n  "UiO-66-CH3": {\n    "water_stability": "Not provided",\n    "probability_score": 0.5,\n    "justification": "Water stability information for UiO-66-CH3 is not provided in the given context."\n  },\n  "UiO-66-SO3H": {\n    "water_stability": "Stable",\n    "probability_score": 0.9,\n    "justification": "UiO-66-SO3H retains its crystallinity after removing guest molecules from the pores, indicating its stability in water."\n  }\n}'

In [8]:
from langchain.llms import OpenAI

llm = OpenAI(temperature=0, model_name='gpt-3.5-turbo-16k') # gpt-3.5-turbo-16k
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False,)

In [9]:
from langchain.agents import Tool, tool

@tool
def eval_justification(justification):
    '''
    Always use this tool at the last step. This function checks if the justification talks about the water stability perdiction makes sense. 
    '''
    import openai
    model="gpt-3.5-turbo"
    prompt = f"""
            Does the below sentence talk about water stability on all found MOFS?\
            If not try to find a better justification for the MOF that does not.\
            
            "{justification}"\

            To do this, you should be looking at information on steep uptakes, change or desolving in water and water stability in the sentence.\
            Do not make up answers. Do not consider chemical or thermal stability or stability in air as a valid reason.\
            """
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, 
    )
    return response.choices[0].message["content"]


# tools = [doc_retrival]


In [10]:

prompt3a = f"""
    You are an expert chemist. The document describes the water stability properties of a few materials including
    Metal-Organic Frameworks(MOF) or coordination polymers, Coordination Networks or ZIF compounds. Please find
    the exact full name for materials, and their stability in water.
    As an example, sentence "UiO-66-X compounds with X = NO2, NH2, OH, CH3 and (CH3)2 show enhanced CO2 uptake", contains the five following MOFS:
    [UiO-66-NO2, UiO-66-NH2, UiO-66-OH, UiO-66-CH3, UiO-66-(CH3)2], but does not provide any information their water stability.
    
    Your final answer should contain the following:
    Your first output is the water stability of each MOF.
    Your second output for each mof should be a probability score ranging between [0, 1].
    This probability score shows how certain you are in your answer.
    Your third output should be exact sentences without any changes from the document that justifies your decision.
    Do not make up a justification that does not exist in the doc.
    This should be "Not provided" if you cannot find water stability.
    Rules: 
    {Rule}
    
    IMPORTANT:
    1. If there are multiple MOFs, please list them all and put "MOF name" before them.
    2. You should only respond in JSON format as described below. 
    3. Make sure it is parsable using Python json.load.
    4. Make sure "MOF name"s do not have duplicates
    5. If information is not provided, state it as not provided.\
    """

In [11]:
llm = OpenAI(temperature=0, model_name='gpt-4') # gpt-3.5-turbo-16k
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever)
qa_chain.run(prompt3a)

'[\n    {\n        "MOF name": "UiO-66-CF3",\n        "Water Stability": "Stable",\n        "Probability Score": 1,\n        "Justification": "The chemical stabilities of the five UiO-66-X compounds were examined by stirring the samples in water, acetic acid,1mHCl and 1 mNaOH solutions for 12 h. Afterwards, the solids were collected by filtration, and their crystallinity was checked by XRPD measurements. All of the solids, except 3-F2, retained their crystallinity after treatment with water, acetic acid and 1 mHCl solutions."\n    },\n    {\n        "MOF name": "UiO-66-(CO2H)2",\n        "Water Stability": "Stable",\n        "Probability Score": 1,\n        "Justification": "The chemical stabilities of the five UiO-66-X compounds were examined by stirring the samples in water, acetic acid,1mHCl and 1 mNaOH solutions for 12 h. Afterwards, the solids were collected by filtration, and their crystallinity was checked by XRPD measurements. All of the solids, except 3-F2, retained their crys

In [12]:
@tool
def read_doc(input):
    '''
    Input the users original prompt and get context on water stability of metal organic frameworks.
    Always search for the answers using this tool first, don't make up answers yourself.
    '''
    llm = OpenAI(temperature=0, model_name='gpt-4') # gpt-3.5-turbo-16k
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm, chain_type="stuff", retriever=retriever)
    return qa_chain({"query": prompt3a})['result']

tools = [
    # Tool(
    #     name = "read_doc",
    #     func=qa.run,
    #     description=f"Always use this tool for context. Use the input prompt for this tool to get context about water stability of MOFs."
    # )
    read_doc, eval_justification]


In [13]:
from langchain.memory import ConversationBufferMemory
from langchain.chat_models  import ChatOpenAI
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.callbacks import get_openai_callback

agent_llm = OpenAI(temperature=0, model_name='gpt-4')

memory = ConversationBufferMemory(memory_key="chat_history")

agent_chain = initialize_agent(tools, agent_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                               verbose=True, memory=memory, handle_parsing_errors=True,
                              prefix= None, suffix=None, output_parser=None, max_iterations=6)

In [14]:
input_prompt = f'''You are an expert chemist. Read paper {paper_id}.pdf, find all the mentioned MOFs and their water stability with a justification of your answers.
                If you cannot find the reason, have "Water Stability" as "Not provided" in your final answer.\

'''

with get_openai_callback() as cb:
    result = agent_chain.run(input=input_prompt)
    print(cb)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to read the document to find all the mentioned MOFs and their water stability. If the document does not provide a reason for the water stability, I will note that in my final answer.
Action: read_doc
Action Input: 70.pdf[0m
Observation: [36;1m[1;3m[
    {
        "MOF name": "UiO-66-CF3",
        "Water Stability": "Stable",
        "Probability Score": 1,
        "Justification": "The chemical stabilities of the five UiO-66-X compounds were examined by stirring the samples in water, acetic acid,1mHCl and 1 mNaOH solutions for 12 h. Afterwards, the solids were collected by filtration, and their crystallinity was checked by XRPD measurements. All of the solids, except 3-F2, retained their crystallinity after treatment with water, acetic acid and 1 mHCl solutions."
    },
    {
        "MOF name": "UiO-66-(CO2H)2",
        "Water Stability": "Stable",
        "Probability Score": 1,
        "Justification": "The chemi

In [15]:
eunomia.parse_to_dict(result, paper_id)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


{'UiO-66-CF3': {'Predicted Stability': 'Stable',
  'Score': 0.9,
  'Justification': 'The chemical stabilities of the five UiO-66-X compounds were examined by stirring the samples in water, acetic acid, 1mHCl and 1mNaOH solutions for 12 h. Afterwards, the solids were collected by filtration, and their crystallinity was checked by XRPD measurements. All of the solids, except 3-F2, retained their crystallinity after treatment with water, acetic acid, and 1mHCl solutions.',
  'Paper id': '70'},
 'UiO-66-(CO2H)2': {'Predicted Stability': 'Stable',
  'Score': 0.8,
  'Justification': 'Same as above.',
  'Paper id': '70'},
 'UiO-66-F2': {'Predicted Stability': 'Unstable',
  'Score': 0.2,
  'Justification': 'Same as above.',
  'Paper id': '70'},
 'UiO-66-Cl2': {'Predicted Stability': 'Stable',
  'Score': 0.7,
  'Justification': 'Same as above.',
  'Paper id': '70'},
 'UiO-66-Br2': {'Predicted Stability': 'Stable',
  'Score': 0.6,
  'Justification': 'Same as above.',
  'Paper id': '70'}}