In [1]:
import os
import eunomia
import pickle




In [2]:
paper_id = '1'
docs_processor = eunomia.LoadDoc('../Data/SI_PDF/', paper_id=paper_id)
sliced_pages = docs_processor.process(['references ', 'acknowledgement', 'acknowledgments', 'references\n'],
                                             chunk_size=1000, chunk_overlap=10, chunking_type='fixed-size')

In [3]:
from langchain.chains import RetrievalQA

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

k = 5
# Embedding_model = 'text-davinci-003' # text-embedding-ada-002
Embedding_model = 'text-embedding-ada-002' # text-embedding-ada-002


faiss_index_path = f"../Data/chosen_papers/faiss/{Embedding_model}/faiss_index_{Embedding_model}_{paper_id}.pkl"
just_load = False
if os.path.isfile(faiss_index_path) and just_load:
    print('loadded Embeddings')
    with open(faiss_index_path, "rb") as f:
        faiss_index = pickle.load(f)
else:
    with open(faiss_index_path, "wb") as f:  # text-embedding-ada-002
        faiss_index = FAISS.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))    # 'text-davinci-003
        pickle.dump(faiss_index, f)

# faiss_index = LanceDB.from_documents(sliced_pages, OpenAIEmbeddings(model=Embedding_model))

retriever  = faiss_index.as_retriever(search_type="mmr", search_kwargs={"k":k, 'fetch_k': 50}) # Maximum Marginal Relevance Retrieval

In [4]:
from langchain.llms import OpenAI

llm = OpenAI(temperature=0, model_name='gpt-3.5-turbo-16k') # gpt-3.5-turbo-16k
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False,)



In [27]:
from langchain.agents import Tool, tool

Rule = f"""
There are only 3 options for water stability: \
1. Stable \
2. Unstable \
3. Not provided\
For 1. Stable, \
No change after exposure, soaking or boiling in water or an aqueous solution.\
Water adsorption isotherm should have a steep uptake.\
For 2. Unstable, \
The MOF will decompose after exposure \
to humid environment or partially dissolves in water. \
For 3. Not provided, \
If you don't know or cannot justfy 1 or 2
"""

@tool
def eval_justification(justification):
    '''
    Always use this tool at the last step. This function checks if the justification talks about the water stability perdiction makes sense. 
    '''
    import openai
    model="gpt-3.5-turbo"
    prompt = f"""
            Does the below sentence talk about water stability on all found MOFS?\
            If not try to find a better justification for the MOF that does not.\
            
            "{justification}"\

            To do this, you should be looking at information on steep uptakes, change or desolving in water and water stability in the sentence.\
            Do not make up answers.\
            """
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, 
    )
    return response.choices[0].message["content"]

tools = [
    Tool(
        name = "doc",
        func=qa.run,
        description=f"Always use this tool for context. Use the original prompt for this tool to get context about water stability of MOFs."
    ), eval_justification]

# tools = [doc_retrival]


In [28]:
from langchain.memory import ConversationBufferMemory
from langchain.chat_models  import ChatOpenAI
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.callbacks import get_openai_callback

agent_llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo-16k')

memory = ConversationBufferMemory(memory_key="chat_history")

agent_chain = initialize_agent(tools, agent_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                               verbose=True, memory=memory, handle_parsing_errors=True,
                              prefix= None, suffix=None, output_parser=None)

In [29]:

prompt3a = f"""
    You are an expert chemist. The doc tool describes the water stability properties of a few materials including \
    Metal-Organic Frameworks(MOF) or coordination polymers, Coordination Networks or ZIF compounds. Please find \
    the full name for materials, and their \
    stability in water. \
    Your final answer should contain the following:\
    Your first output is the water stability of each MOF. \
    Your second output for each mof should be a probability score ranging between [0, 1].\
    This probability score shows how certain you are in your answer.\
    Your third output should be exact sentences without any changes from the document that justifies your decision.\
    This should be "Not provided" if you cannot find water stability.\
    Rule: {Rule}\
    If there are multiple MOFs, please list them all and put "MOF name" before them. \
    You should only respond in JSON format as described below. 
    Make sure it is parsable using Python json.load.
    Make sure "MOF name"s do not have duplicates
    If information is not provided, state it as not provided.\
    """

In [30]:
with get_openai_callback() as cb:
    result = agent_chain.run(input=prompt3a)
    print(cb)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to use the doc tool to find the information about water stability of MOFs. I should also keep track of the MOF names and their stability in water. I will start by using the doc tool.
Action: doc
Action Input: water stability of MOFs[0m
Observation: [36;1m[1;3mBased on the information provided, the water stability of the bio-MOFs was evaluated by soaking them in water for different periods of time and then evaluating their stability using PXRD and SEM imaging. Bio-MOF-11 dissolved rapidly in water, while bio-MOFs-12 and 13 dissolved partially. Bio-MOF-13, with butyrate, was noticeably more stable in water than bio-MOF-12, with propionate. Bio-MOF-14, with valerate groups, was the most stable and showed no loss of crystallinity or significant crystal degradation after soaking in water for 1 hour, 7 days, or 30 days. Additionally, CO2 isotherms collected for bio-MOF-14 soaked in water for 7 or 30 days showed no loss in

In [31]:
eunomia.parse_to_dict(result, paper_id)

{'Bio-MOF-11': {'Predicted Stability': 'Unstable',
  'Score': 1.0,
  'Justification': 'Bio-MOF-11 dissolved rapidly in water',
  'Paper id': '1'},
 'Bio-MOF-12': {'Predicted Stability': 'Unstable',
  'Score': 1.0,
  'Justification': 'Bio-MOF-12 dissolved partially in water',
  'Paper id': '1'},
 'Bio-MOF-13': {'Predicted Stability': 'Unstable',
  'Score': 1.0,
  'Justification': 'Bio-MOF-13 dissolved partially in water',
  'Paper id': '1'},
 'Bio-MOF-14': {'Predicted Stability': 'Stable',
  'Score': 1.0,
  'Justification': 'Bio-MOF-14 showed no loss of crystallinity or significant crystal degradation after soaking in water for various durations. Water did not affect the porosity of bio-MOF-14.',
  'Paper id': '1'}}