Evaluate the LLM responses

In [2]:
%load_ext autoreload
%autoreload 2
%load_ext dotenv
%dotenv

In [82]:
import json
import os
import pandas as pd
from pinecone import Pinecone, ServerlessSpec
from llama_index.llms.openai import OpenAI
from llama_index.core import (
    Settings, Document, VectorStoreIndex, get_response_synthesizer, PromptTemplate
)
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from dotenv import load_dotenv
from difflib import SequenceMatcher
from fuzzywuzzy import fuzz
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re

### Load environment variables (for API keys)

In [31]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)
client = OpenAI(model="gpt-4o-mini", temperature=0)

### Initialize OpenAI client and Pinecone

In [5]:
embedding = OpenAIEmbedding(model="text-embedding-ada-002")
Settings.llm = client
Settings.embed_model = embedding
Settings.chunk_size_limit = 1536

In [11]:
# Initialize Pinecone vector store (if required)
pinecone_index = pinecone_client.Index("chatbot-index")
vector_store = PineconeVectorStore(pinecone_index)
retriever = VectorIndexRetriever(index=VectorStoreIndex.from_vector_store(vector_store=vector_store), similarity_top_k=5)

In [12]:
# Define the chatbot's prompt template
prompt_template = (
    "You are a friendly chatbot specialized in helping beginners use FamilySearch and its tools. 😊 "
    "This includes record hints, source attachments, and other related topics. Provide clear and concise answers, "
    "and try to make the conversation enjoyable! 😄\n\n"
    "Context:\n"
    "#####################################\n"
    "{context_str}\n"
    "Answer the user's question: {query_str}\n\n"
    "If the question is related to FamilySearch or its tools (such as record hints, source attachments, or genealogical research), "
    "provide a detailed answer along with a summary. Also, include the following source metadata as 'Source':\n"
    "- **Title**: {title}\n"
    "- **Publish Date**: {date}\n"
    "- **URL**: {url}\n\n"
    "However, if the question is unrelated to FamilySearch, provide a direct and concise answer without any summary or metadata."
)

# Create the template and response synthesizer
qa_template = PromptTemplate(template=prompt_template)
response_synthesizer = get_response_synthesizer(
    llm=client, text_qa_template=qa_template, response_mode="compact"
)
query_engine = RetrieverQueryEngine(
    retriever=retriever, response_synthesizer=response_synthesizer
)

In [32]:
# Function to query the LLM for an answer based on a given question
def query_llm(question):
    # Query the LLM using llama_index's query engine
    try:
        response = query_engine.query(question)
        return response.response  # Ensure this extracts the correct text part of the response
    except Exception as e:
        return f"Error fetching LLM response: {str(e)}"

### Function to calculate the similarity score between two strings and to evaluate LLM responses against predefined answers

In [69]:
# Function to calculate semantic similarity using embeddings
def calculate_semantic_similarity(stored_answer, llm_answer):
    # Wrap the answers in Document objects
    stored_doc = Document(content=stored_answer)
    llm_doc = Document(content=llm_answer)

    # Generate embeddings for both answers
    stored_embedding = Settings.embed_model.embed([stored_doc])[0]  # Assuming this is the correct method
    llm_embedding = Settings.embed_model.embed([llm_doc])[0]  # Assuming this is the correct method

    # Calculate cosine similarity
    return cosine_similarity([stored_embedding], [llm_embedding])[0][0] * 100  # Multiply by 100 to get a percentage

In [14]:
# Load JSON file with predefined questions and answers
with open('../data/faq_test_data/faq_test_data2.json', 'r') as f:
    test_data = json.load(f)

In [77]:
# Create a DataFrame from JSON
df = pd.DataFrame(test_data)
df

Unnamed: 0,question,answer,url
0,What are record hints in Family Tree?,Record hints are historical records that Famil...,https://www.familysearch.org/en/help/helpcente...
1,What is the difference between the left and th...,The left column shows you details from the his...,https://www.familysearch.org/en/help/helpcente...
2,What do the Details and Compare buttons do in ...,The Details button expands both the record det...,https://www.familysearch.org/en/help/helpcente...
3,What is my source box?,Your source box contains a list of sources tha...,https://www.familysearch.org/en/help/helpcente...
4,How do I give feedback to FamilySearch about S...,We want to know what you think about Source Li...,https://www.familysearch.org/en/help/helpcente...
5,I'm stumped. How can I get help with Source Li...,The Source Linker Learning Center is a great f...,https://www.familysearch.org/en/help/helpcente...
6,Why is attaching a source important?,Sources help answer questions and make informa...,https://www.familysearch.org/en/help/helpcente...
7,What do I do with an unfinished attachment?,An unfinished attachment is a source that has ...,https://www.familysearch.org/en/help/helpcente...


In [88]:
# List to store LLM responses and expected answers
llm_responses = []
expected_answers = []

In [89]:
# Iterate through the DataFrame to compare the LLM responses with the stored answers
for index, row in df.iterrows():
    question = row['question']
    expected_answer = row['answer']

    # Get the LLM's answer
    llm_response = query_llm(question)

    # Extract the answer from the LLM response
    llm_answer = llm_response.split("Source:")[0].strip()  # Get everything before "Source:"

    # Append results
    llm_responses.append(llm_answer)
    expected_answers.append(expected_answer)

In [90]:

# Add the results to the DataFrame
df['LLM Response'] = llm_responses
df['Expected Answer'] = expected_answers

In [93]:
# Configure pandas display settings
pd.set_option('display.max_colwidth', None)  # Show full column width
pd.set_option('display.max_rows', None)   

In [94]:
# Display the relevant columns of the DataFrame
df[['question', 'Expected Answer', 'LLM Response']]

Unnamed: 0,question,Expected Answer,LLM Response
0,What are record hints in Family Tree?,"Record hints are historical records that FamilySearch finds for you about your ancestors. These hints can often help you identify missing children, parents, siblings, and other relatives in your family tree. Learn more about record hints.","Record hints in Family Tree are historical records that FamilySearch automatically finds for you about your ancestors. This hinting system is continuously searching for new records, as FamilySearch regularly adds historical data. \n\nThese hints can provide valuable information about your ancestors, such as their occupations, physical descriptions, and even details about missing family members like children, parents, and siblings. You can easily transfer information from these record hints to Family Tree without needing to retype it.\n\nTo make the most of record hints, you should:\n\n1. Verify that the record hint is indeed about your ancestor.\n2. Transfer any desired information from the record hint to Family Tree. Note that you cannot replace existing information directly on the website; you'll need to update it manually in Family Tree. However, you can replace information using the mobile app.\n3. Attach the record hint to your ancestors in Family Tree, which saves the record as a source for their profiles.\n\nIn summary, record hints are a fantastic way to uncover more about your family history and enhance your Family Tree with accurate information!\n\n**Source**:\n- **Title**: What are record hints in Family Tree?\n- **Publish Date**: April 24, 2024\n- **URL**: [FamilySearch Help Center](https://www.familysearch.org/en/help/helpcenter/article/what-are-record-hints-in-family-tree)"
1,What is the difference between the left and the right column in Source Linker?,The left column shows you details from the historical record. The right column shows you details from the ancestor's person page in the FamilySearch Family Tree.,"In Source Linker, the left column displays details from the historical record you are viewing, while the right column shows information from the ancestor’s person page in Family Tree. If the ancestor is not yet in Family Tree, you will have the option to add them. The purpose of Source Linker is to help you attach records to existing ancestors, ensuring that the details in Family Tree are accurate and supported by evidence.\n\n**Summary**: \n- **Left Column**: Historical record details.\n- **Right Column**: Ancestor’s details from Family Tree (or option to add if not present).\n\n**Source**:\n- **Title**: Understanding the Source Linker screen layout\n- **Publish Date**: June 06, 2024\n- **URL**: [Understanding the Source Linker screen layout](https://www.familysearch.org/en/help/helpcenter/article/understanding-the-source-linker-screen-layout)"
2,What do the Details and Compare buttons do in Source Linker?,"The Details button expands both the record details (left column) and the person details (right column) The Compare button also expands the record details and the person details. In addition, if information in the record is not already in Family Tree, you will see green Add buttons that allow you to add the information.","In Source Linker, the **Details** and **Compare** buttons serve important functions:\n\n- The **Details** button allows you to view more information about a specific person in the Family Tree. When you click on it, you can see the details associated with that ancestor, which helps you understand their profile better.\n\n- The **Compare** button, represented by a purple icon, is used to compare the information from the historical record (left column) with the details already in the Family Tree (right column). When you click this button, it expands both columns so you can see the details side by side. This is particularly useful for determining if the person in the record is the same as the one in Family Tree and for identifying any new or different information that can be added or edited.\n\nIn summary, the **Details** button helps you access the ancestor's profile, while the **Compare** button allows you to analyze and link historical records to the Family Tree.\n\n**Source**:\n- **Title**: Understanding the Source Linker screen layout\n- **Publish Date**: June 06, 2024\n- **URL**: https://www.familysearch.org/en/help/helpcenter/article/understanding-the-source-linker-screen-layout"
3,What is my source box?,"Your source box contains a list of sources that you have saved. In your source box, you can create, edit, and organize sources. You can also copy them and attach them to profiles in the Family Tree. Selecting Add to Source Box in Source Linker saves the source's bibliographic information to your FamilySearch account. After that, you can revisit the source any time you want: Sign in to FamilySearch. In the top right corner, click your name. Click Source Box .","Your source box is a handy feature on FamilySearch that allows you to store and organize sources for your genealogical research. It lets you reuse sources without having to retype them each time you need them. Here are some key features of your source box:\n\n- **Storage**: You can store over 10,000 sources.\n- **Organization**: Create up to 200 folders to keep your sources organized.\n- **Adding Sources**: You can add your own sources into Family Tree.\n- **Attaching Records**: Gather historical records and attach them to your ancestors' records.\n- **Printing**: Print sources directly from the view page of individual sources.\n\nTo access your source box, simply click your name in the upper right corner of the FamilySearch website and select **Source Box** from the drop-down menu.\n\nIf you want to learn more about how to use your source box, feel free to ask! 😊\n\n**Source**:\n- **Title**: What is my source box?\n- **Publish Date**: May 16, 2023\n- **URL**: [What is my source box?](https://www.familysearch.org/en/help/helpcenter/article/what-is-a-source-box)"
4,How do I give feedback to FamilySearch about Source Linker?,"We want to know what you think about Source Linker. Here's how to give feedback: At the bottom of the Source Linker page, click the Feedback button. Select the emoji that best describes your reaction. In the feedback box that opens, enter your thoughts and suggestions. Thank you for sharing!","To give feedback to FamilySearch about Source Linker, you can follow these steps:\n\n1. **Visit the FamilySearch website** and log in to your account.\n2. Navigate to the **Help Center** or the specific page related to Source Linker.\n3. Look for a **Feedback** or **Contact Us** option, which is usually found at the bottom of the page or in the help section.\n4. Fill out the feedback form with your comments or suggestions regarding Source Linker. Be as specific as possible to help them understand your experience.\n\nYour feedback is valuable and helps improve the tools for everyone!\n\n**Summary**: To provide feedback on Source Linker, log in to FamilySearch, go to the Help Center, and find the feedback option to submit your comments.\n\n**Source**:\n- **Title**: Understanding the Source Linker screen layout\n- **Publish Date**: June 06, 2024\n- **URL**: https://www.familysearch.org/en/help/helpcenter/article/understanding-the-source-linker-screen-layout"
5,I'm stumped. How can I get help with Source Linker?,The Source Linker Learning Center is a great first option. You can also visit FamilySearch Help and Learning or contact FamilySearch Support .,"If you're feeling stumped with Source Linker, don't worry! There are several ways to get help:\n\n1. **FamilySearch Help Center**: You can visit the FamilySearch Help Center for articles and guides on using Source Linker and other tools. They have a wealth of information that can guide you through common issues.\n\n2. **Community Forums**: Engage with the FamilySearch community forums where you can ask questions and share experiences with other users. Many experienced genealogists are happy to help beginners!\n\n3. **Video Tutorials**: Check out video tutorials on FamilySearch's YouTube channel. Visual guides can be really helpful in understanding how to navigate Source Linker.\n\n4. **Contact Support**: If you're still having trouble, you can reach out to FamilySearch support for personalized assistance.\n\nRemember, everyone starts somewhere, and asking for help is a great way to learn! 😊\n\n**Source**:\n- **Title**: Understanding the Source Linker screen layout\n- **Publish Date**: June 06, 2024\n- **URL**: https://www.familysearch.org/en/help/helpcenter/article/understanding-the-source-linker-screen-layout"
6,Why is attaching a source important?,"Sources help answer questions and make information about ancestors more accurate. They also make exploring the Tree a richer, more inspiring experience. These are just a few of the benefits of attaching sources to the FamilySearch Family Tree.","Attaching a source in FamilySearch is important for several reasons:\n\n1. **Collaboration**: Sources help users work together more effectively in Family Tree. They can settle disputes about information and provide a common ground for discussion.\n\n2. **Richness of Information**: By adding sources, you enrich your family tree with historical context, making your ancestors feel more real and connected to you.\n\n3. **Accuracy**: Sources demonstrate the accuracy of the information in your tree. They provide evidence that supports the details you have recorded.\n\n4. **Conflict Resolution**: When you encounter conflicting information (like different birth dates), sources can help you evaluate which piece of information is more likely to be correct.\n\n5. **Acknowledgment of Others' Work**: Attaching sources recognizes the efforts of others who have contributed to the genealogical research, fostering a sense of community.\n\nIn summary, attaching sources is a vital part of building a reliable and collaborative family history.\n\n**Source**:\n- **Title**: What are the benefits of adding sources to Family Tree?\n- **Publish Date**: June 08, 2023\n- **URL**: [FamilySearch](https://www.familysearch.org/en/help/helpcenter/article/what-are-the-benefits-of-adding-sources-to-family-tree)"
7,What do I do with an unfinished attachment?,"An unfinished attachment is a source that has not been attached to all of the people mentioned in the source. If you encounter an unfinished attachment , try reviewing it and attaching it to the other relatives mentioned in it.","If you encounter an unfinished attachment in Family Tree on FamilySearch, here's what you can do:\n\n1. **Access Unfinished Attachments**: Go to the person's Sources tab and click on **Unfinished Attachments**.\n \n2. **Use the Source Linker**: You can either:\n - Attach the record to other people already in Family Tree.\n - Add missing people to Family Tree (make sure to indicate if they are living, as their information will be private).\n - Choose to do nothing if the mentioned individuals are not closely related.\n\n3. **Dismiss the Notification**: If you want to remove the unfinished attachment notification, click **Dismiss**. Remember, this action is permanent and affects all users.\n\n4. **Turn Off Notifications**: If you prefer not to see these notifications, go to **Options** in the Sources tab and toggle off **Unfinished Attachments**.\n\nBy following these steps, you can effectively manage unfinished attachments in Family Tree!\n\n**Source**:\n- **Title**: How do I handle unfinished attachments in Family Tree?\n- **Publish Date**: June 05, 2024\n- **URL**: [FamilySearch Help Center](https://www.familysearch.org/en/help/helpcenter/article/how-do-i-handle-unfinished-attachments-in-family-tree)"
