In [224]:
import pandas as pd

In [225]:
import os
os.environ["OPENAI_API_KEY"] = '....'

In [226]:
df = pd.read_csv('metropolitan_highlight.csv')
df["build_date"] = df.apply(lambda row: f"{row.object_begin_date} - {row.object_end_date}", axis=1)
df = df[["title","artist_display_name","build_date","credit_line","accessionyear","department","medium","dimensions","description"]]

In [227]:
from langchain.document_loaders import DataFrameLoader
loader = DataFrameLoader(df, page_content_column="description")
documents = loader.load()

In [228]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm
Downloading pytorch_model.bin: 100%|██████████| 438M/438M [01:52<00:00, 3.90MB/s] 
Downloading (…)nce_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<00:00, 21.1kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 239/239 [00:00<00:00, 86.8kB/s]
Downloading (…)a8e1d/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 3.36MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 363/363 [00:00<00:00, 158kB/s]
Downloading (…)8e1d/train_script.py: 100%|██████████| 13.1k/13.1k [00:00<00:00, 4.41MB/s]
Downloading (…)b20bca8e1d/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 1.15MB/s]
Downloading (…)bca8e1d/modules.json: 100%|██████████| 349/349 [00:00<00:00, 157kB/s]


In [22]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)
db.save_local("faiss_index")


In [8]:
from langchain.vectorstores import FAISS
db = FAISS.load_local("faiss_index", embeddings)


In [30]:
retriever = db.as_retriever(search_kwargs={"k": 3, "search_type":"cosine"})


In [31]:
docs = retriever.get_relevant_documents("What is the name of the painting that represents 2 children holding a basket in front of a boat ?")
docs


[Document(page_content='At Gloucester, Homer produced a series of watercolors focusing on the daily activities of local children, whether boating, helping with chores, or playing among the dunes and wharves. In A Basket of Clams, one of the earliest watercolors by the artist in The Met collection, Homer depicts two boys lugging their haul across the beach. The smaller figure appears to eye the dead shark ahead, while his older companion looks back, seemingly at the sailboat behind them. These two details—the shark and the ship—cast a shadow on an otherwise bright scene, subtly gesturing to threats the youth of this fishing village might someday face at sea.', metadata={'title': 'A Basket of Clams', 'artist_display_name': 'Winslow Homer', 'build_date': '1873 - 1873', 'credit_line': 'Gift of Arthur G. Altschul, 1995', 'accessionyear': 1995.0, 'department': 'The American Wing', 'medium': 'Watercolor on wove paper', 'dimensions': '11 1/2 x 9 3/4 in. (29.2 x 24.8 cm)\r\n'}),
 Document(page_

In [222]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

## This is a LLM model that will be used to answer the user questions in a conversational manner
template = """You are the MET information bot.
You will be given a corpus of documents from the MET museum.
Basing yourself on the corpus of documents provided to you and the chat history, answer the user questions. 
Here is the corpus :{documents}
Chat history : {chat_history}
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{query}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

query = input("Hi what can I do for you ?")
query_doc = True
chat_history = []
llm = ChatOpenAI(temperature = 0.0,streaming = True, callbacks=[StreamingStdOutCallbackHandler()])

while query != "quit":
    if query_doc:
        candidate_documents = retriever.get_relevant_documents(query)
        documents = "".join([f"\n\nDocument : {i} "+ "\n" + "Content : " + doc.page_content + "\n" + "Metadata : " + str(doc.metadata) for i,doc in enumerate(candidate_documents)])
        query_doc = False
    else : 
        ## Pass for the moment. // TODO : Add a chain to keep the relevant documents in memory and query others. 
        query_doc = False
        pass
    ## Create the discussion prompt
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    ## Query the model
    result = llm(chat_prompt.format_prompt(chat_history=chat_history,documents=documents,query=query).to_messages())
    ## Add the Question / Answer tuple to the chat history
    chat_history.append((query,result.content))
    ## Print the answer
    query = input(result.content)


In [220]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

## Prompt used for summarization
summerization_template = """Write a concise summary of the following
{text}
"""
summerization_template = PromptTemplate(template=summerization_template, input_variables=["text"])

## Prompt used for the re-query guesser
requery_template = """Based on the following chat history  : 
{chat_history}
and the user query :
{query}
Try to understand if the user is changing of topic and therefore we need to call the retriever again.
Answer can't be something different than "True" or "False". 
"""
requery_template = PromptTemplate(template=requery_template, input_variables=["chat_history","query"])

## This is a LLM model that will be used to answer the user questions in a conversational manner
template = """You are the MET information bot.
You will be given a corpus of documents from the MET museum.
Basing yourself on the corpus of documents provided to you and the chat history, answer the user questions. 
Here is the corpus :{documents}
Chat history : {chat_history}
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{query}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

query = input("Hi what can I do for you ?")
query_doc = True
chat_history = []

llm = ChatOpenAI(temperature = 0.0,streaming = True, callbacks=[StreamingStdOutCallbackHandler()])
summerization_chain = load_summarize_chain(OpenAI(max_tokens=200), chain_type="stuff", prompt=summerization_template)
re_query_chain = LLMChain(llm=OpenAI(max_tokens=200), prompt=requery_template)
while query != "quit":
    if query_doc:
        candidate_documents = retriever.get_relevant_documents(query)
        print(candidate_documents)
        documents = "".join([f"\n\nDocument : {i} "+ "\n" + "Content : " + doc.page_content + "\n" + "Metadata : " + str(doc.metadata) for i,doc in enumerate(candidate_documents)])
        query_doc = False
    else :
        result = re_query_chain.run(chat_history=chat_history,query=query).strip()
        if result == "True":
            query_doc = True
            continue
        else:
            query_doc = False
        
    ## Create the discussion prompt
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    ## Query the model
    result = llm(chat_prompt.format_prompt(chat_history=chat_history,documents=documents,query=query).to_messages())
    ## Add the Question / Answer tuple to the chat history
    chat_history.append((query,result.content))
    # chat_history = summerization_chain.run(candidate_documents)
    # print(chat_history)
    ## Print the answer
    query = input(result.content)


[Document(page_content='At Gloucester, Homer produced a series of watercolors focusing on the daily activities of local children, whether boating, helping with chores, or playing among the dunes and wharves. In A Basket of Clams, one of the earliest watercolors by the artist in The Met collection, Homer depicts two boys lugging their haul across the beach. The smaller figure appears to eye the dead shark ahead, while his older companion looks back, seemingly at the sailboat behind them. These two details—the shark and the ship—cast a shadow on an otherwise bright scene, subtly gesturing to threats the youth of this fishing village might someday face at sea.', metadata={'title': 'A Basket of Clams', 'artist_display_name': 'Winslow Homer', 'build_date': '1873 - 1873', 'credit_line': 'Gift of Arthur G. Altschul, 1995', 'accessionyear': 1995.0, 'department': 'The American Wing', 'medium': 'Watercolor on wove paper', 'dimensions': '11 1/2 x 9 3/4 in. (29.2 x 24.8 cm)\r\n'}), Document(page_c

## Evaluate

### With LLM

In [230]:
# Generated examples
from langchain.evaluation.qa import QAGenerateChain
example_gen_chain = QAGenerateChain.from_llm(OpenAI())

In [231]:
new_examples = example_gen_chain.apply_and_parse([{"doc": t} for t in docs[:5]])


In [232]:
new_examples

[{'query': 'What is the overall form and coloration of the Adams Vase designed to resemble?',
  'answer': 'The bell-shaped cotton flower.'},
 {'query': 'What is the build date of the Spindle-back armchair?',
  'answer': '1640 - 1680'},
 {'query': 'Who is thought to have created the armchair described in the document?',
  'answer': 'Gustave Herter'},
 {'query': 'What did Auguste Pottier train in before meeting William Stymus?',
  'answer': 'Auguste Pottier trained as a sculptor in Paris.'},
 {'query': 'What type of glass was used to create the window?',
  'answer': 'Leaded Favrile glass'}]

In [233]:
def chat_predict(question):
    ## This is a LLM model that will be used to answer the user questions in a conversational manner
    template = """You are the MET information bot.
    You will be given a corpus of documents from the MET museum.
    Basing yourself on the corpus of documents provided to you and the chat history, answer the user questions. 
    Here is the corpus :{documents}
    Chat history : {chat_history}
    """

    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    human_template = "{query}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    query = question
    query_doc = True
    chat_history = []

    llm = ChatOpenAI(temperature = 0.0,streaming = True, callbacks=[StreamingStdOutCallbackHandler()])
    candidate_documents = retriever.get_relevant_documents(query)
    documents = "".join([f"\n\nDocument : {i} "+ "\n" + "Content : " + doc.page_content + "\n" + "Metadata : " + str(doc.metadata) for i,doc in enumerate(candidate_documents)])
            
    ## Create the discussion prompt
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    ## Query the model
    result = llm(chat_prompt.format_prompt(chat_history=chat_history,documents=documents,query=query).to_messages())
    return result.content

In [240]:
predictions = []
for example in new_examples:
    predictions.append({"query":example["query"],"answer":example["answer"],"result":chat_predict(example["query"])})


The Adams Vase was designed to resemble the cotton plant, with the overall form and coloration emulating those of the bell-shaped cotton flower, and the rock-crystal cover representing the white boll.I'm sorry, I don't have any information about a Spindle-back armchair in my corpus. Could you please provide more details or context about the chair you are referring to?The document does not describe an armchair, but it does mention a suite of seating furniture that includes a pair of armchairs and ten side chairs. The set is attributed to Duncan Phyfe based on the traditional history of ownership and the skillful execution of the details.Auguste Pottier trained as a sculptor in Paris before meeting William Stymus.In document 0, it is mentioned that the window was created solely with glass, using the full range developed at Tiffany Studios. The medium used for the window in document 0 is Leaded Favrile glass.

In [241]:
predictions

[{'query': 'What is the overall form and coloration of the Adams Vase designed to resemble?',
  'answer': 'The bell-shaped cotton flower.',
  'result': 'The Adams Vase was designed to resemble the cotton plant, with the overall form and coloration emulating those of the bell-shaped cotton flower, and the rock-crystal cover representing the white boll.'},
 {'query': 'What is the build date of the Spindle-back armchair?',
  'answer': '1640 - 1680',
  'result': "I'm sorry, I don't have any information about a Spindle-back armchair in my corpus. Could you please provide more details or context about the chair you are referring to?"},
 {'query': 'Who is thought to have created the armchair described in the document?',
  'answer': 'Gustave Herter',
  'result': 'The document does not describe an armchair, but it does mention a suite of seating furniture that includes a pair of armchairs and ten side chairs. The set is attributed to Duncan Phyfe based on the traditional history of ownership an

In [242]:
from langchain.evaluation.qa import QAEvalChain
eval_chain = QAEvalChain.from_llm(llm = OpenAI(temperature=0))
graded_outputs = eval_chain.evaluate(new_examples, predictions)


In [243]:
for i, eg in enumerate(new_examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

Example 0:
Question: What is the overall form and coloration of the Adams Vase designed to resemble?
Real Answer: The bell-shaped cotton flower.
Predicted Answer: The Adams Vase was designed to resemble the cotton plant, with the overall form and coloration emulating those of the bell-shaped cotton flower, and the rock-crystal cover representing the white boll.
Predicted Grade:  CORRECT

Example 1:
Question: What is the build date of the Spindle-back armchair?
Real Answer: 1640 - 1680
Predicted Answer: I'm sorry, I don't have any information about a Spindle-back armchair in my corpus. Could you please provide more details or context about the chair you are referring to?
Predicted Grade:  INCORRECT

Example 2:
Question: Who is thought to have created the armchair described in the document?
Real Answer: Gustave Herter
Predicted Answer: The document does not describe an armchair, but it does mention a suite of seating furniture that includes a pair of armchairs and ten side chairs. The se

### With Critique

In [244]:
import inspiredco.critique
import os
critique = inspiredco.critique.Critique(api_key="_AH-tvFDWAemlvh918HhehlQlbyfrIcAvfMF745uZIiMv6ZUj-ROt1h8PKGqNLhrvgTLGneuIXzWBpdhj4UfMg==")

In [245]:
metrics = {
    "rouge": {
        "metric": "rouge",
        "config": {"variety": "rouge_l"},
    },
    "chrf": {
        "metric": "chrf",
        "config": {},
    },
    "bert_score": {
        "metric": "bert_score",
        "config": {"model": "bert-base-uncased"},
    },
    "uni_eval": {
        "metric": "uni_eval",
        "config": {"task": "summarization", "evaluation_aspect": "relevance"},
    },
}

In [246]:
critique_data = [
    {"target": pred['result'], "references": [pred['answer']]} for pred in predictions
]
eval_results = {
    k: critique.evaluate(dataset=critique_data, metric=v["metric"], config=v["config"])
    for k, v in metrics.items()
}

In [248]:
for i, eg in enumerate(new_examples):
    score_string = ", ".join([f"{k}={v['examples'][i]['value']:.4f}" for k, v in eval_results.items()])
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Scores: " + score_string)
    print("Predicted Grade: " + graded_outputs[i]['text'])

    print()

Example 0:
Question: What is the overall form and coloration of the Adams Vase designed to resemble?
Real Answer: The bell-shaped cotton flower.
Predicted Answer: The Adams Vase was designed to resemble the cotton plant, with the overall form and coloration emulating those of the bell-shaped cotton flower, and the rock-crystal cover representing the white boll.
Predicted Scores: rouge=0.2632, chrf=0.4357, bert_score=0.6114, uni_eval=0.8846
Predicted Grade:  CORRECT

Example 1:
Question: What is the build date of the Spindle-back armchair?
Real Answer: 1640 - 1680
Predicted Answer: I'm sorry, I don't have any information about a Spindle-back armchair in my corpus. Could you please provide more details or context about the chair you are referring to?
Predicted Scores: rouge=0.0000, chrf=0.0047, bert_score=0.2496, uni_eval=0.8094
Predicted Grade:  INCORRECT

Example 2:
Question: Who is thought to have created the armchair described in the document?
Real Answer: Gustave Herter
Predicted An