Purpose: Turn text-data into data with appropriate values neccessary for functional RAGAS evaluation

Fetch documents (from only the SciComp wiki as of now) and put into accessable format for generation of expected outputs and creation of context later on

In [2]:
"""WEB LOADER"""

import argparse
import os
import sys
import logging
import warnings

import bs4 as bs
import html2text
from llama_index.core import Document
from langchain_community.document_loaders import TextLoader

data_path = '../data/janelia.org'  # Use './' to indicate the current directory
text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.images_to_alt = True
text_maker.single_line_break = True
text_maker.ignore_emphasis = True
SOURCE = "Web"


def webpage_to_text(soup):
    """ Convert a generic web page to searchable text
    """
    title = soup.title.text
    text = text_maker.handle(str(soup))
    return title,text


def janelia_org_to_text(soup):
    """ Convert a janelia.org page to searchable text
    """
    title = soup.title.text.replace(" | Janelia Research Campus","")
    content_sections = soup.find_all("section", class_="content-section")
    if not content_sections:
        return title,None
    if len(content_sections) > 1:
        raise Exception("More than one content section")
    content = content_sections[0]
    # Remove useless content
    for div in content.find_all("div", {'class':['panels-ipe-label','secondary_menu']}):
        div.decompose()
    # Html2text smashes text together if only tags separate it
    # This fix not only adds the spacing but also adds a separator for nav buttons
    for span in content.find_all("span", {'class':'button-wrapper'}):
        sep = bs.NavigableString(" / ")
        span.insert(0, sep)
    text = text_maker.handle(str(content))
    return title,text


def html_to_text(link, body):
    """ Convert a web page to plain text for use as a GPT prompt.
    """
    soup = bs.BeautifulSoup(body,'lxml')
    if "janelia.org" in link:
        title,text = janelia_org_to_text(soup)
    else:
        title,text = webpage_to_text(soup)
    return title,text


class WebSiteLoader():

    def __init__(self, data_path):
        self.data_path = data_path

    def create_document(self, name, title, link, doc_text):
        metadata = {"source": self.data_path, "title": title, "link": link}
        # Debugging: Print doc_text to ensure it's not empty
        return [Document(page_content=doc_text, metadata=metadata)]
    
    def load_all_documents(self):
        documents = []
        for root, dirs, files in os.walk(self.data_path):
            for name in files:
                filepath = os.path.join(root, name)
                with open(filepath) as f:
                    link = f.readline().strip()
                    body = f.read()
                    title, text = html_to_text(link, body)
                    
                    
                    # print(f"Title: {title}")
                    # print(f"Text: {text}")
                    if text:
                        final_text = title + "\n" + text
                        with open('tempTestGen.txt', 'w') as file:
                            file.write(final_text)
                        loader = TextLoader("./tempTestGen.txt")
                        doc = loader.load()
                        documents.append(doc)
        return documents
    




# Open output.txt in write mode




In [3]:
"""ARCHIVED WIKI LOADRER"""
import argparse
import os
import re
import sys
import logging
import warnings
from langchain_community.document_loaders import TextLoader

import html2text
from llama_index.core import Document

warnings.simplefilter("ignore", ResourceWarning)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
SOURCE = "Wiki"

text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.ignore_images = True


def wiki_to_text(ancestors, title, authors, labels, body):
    """ Convert a wiki document to plain text for use as a GPT prompt.
    """
    body_text = text_maker.handle(body)
    text =  f"Title: {title}\n"
    if authors: text += f"Authors: {authors}\n" 
    if ancestors: text += f"Ancestors: {ancestors}\n" 
    if labels: text += f"Labels: {ancestors}\n"
    text += f"{body_text}"
    return text


class WikiLoader():

    def __init__(self, data_path):
        self.data_path = data_path

    def create_document(self, name, title, link, doc_text):
        metadata = {"source": self.data_path, "title": title, "link": link}
        return [Document(page_content=doc_text, metadata=metadata)]

    def load_all_documents(self):
        documents = []
        for root, dirs, files in os.walk(self.data_path):
            for name in files:
                filepath = os.path.join(root, name)
                with open(filepath) as f:
                    link = f.readline().rstrip()
                    ancestors = f.readline().rstrip()
                    title = f.readline().rstrip()
                    authors = f.readline().rstrip()
                    labels = f.readline().rstrip()
                    body = re.sub('[\n]+', '\n', "".join(f.readlines()))
                    text = wiki_to_text(ancestors, title, authors, labels, body)
                    # doc = self.create_document(name, title, link, text)
                    # documents.append(doc)
                    if text:
                        final_text = title + "\n" + text
                        with open('tempTestGen.txt', 'w') as file:
                            file.write(final_text)
                        loader = TextLoader("./tempTestGen.txt")
                        doc = loader.load()
                        documents.append(doc)
        return documents
    


In [4]:
"""ARCHIVED SLACK LOADER"""
import argparse
import re
import sys
import glob
import json
import logging
import warnings
from decimal import Decimal

from llama_index.core import Document

warnings.simplefilter("ignore", ResourceWarning)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
SOURCE = "Slack"
DOCUMENT_PAUSE_SECS = 300
IGNORED_SUBTYPES = set(['channel_join','channel_leave','bot_message'])


def get(dictionary, key):
    """ Get the key out of the dictionary, if it exists. If not, return None.
    """
    if dictionary and key in dictionary:
        return dictionary[key]
    return None


def fix_text(text):
    """ Standard transformations on text like squashing multiple newlines.
    """
    text = re.sub("\n+", "\n", text)
    return text


class ArchivedSlackLoader():

    def __init__(self, data_path, debug=False):
        self.data_path = data_path
        self.id2username = {}
        self.id2realname = {}
        self.channel2id = {}
        self.debug = debug

        for user in self.get_users():
            id = user['id']
            self.id2username[id] = user['name']
            self.id2realname[id] = user['profile']['real_name']

        logger.info(f"Loaded {len(self.id2username)} users")
        for channel in self.get_channels():
            logger.debug(f"{channel['id']}: {channel['name']}")
            self.channel2id[channel['name']] = channel['id']
        
        logger.info(f"Loaded {len(self.channel2id)} channels")


    def get_users(self):
        """ Generator which returns users from the users.json file.
        """
        with open(f"{self.data_path}/users.json", 'r') as f:
            users = json.load(f)
            for user in users:
                yield user


    def get_channels(self):
        """ Generator which returns channels from the channels.json file.
        """
        with open(f"{self.data_path}/channels.json", 'r') as f:
            channels = json.load(f)
            for channel in channels:
                yield channel


    def get_messages(self, channel_name):
        """ Generator which returns messages from the json files in the given channel directory.
        """
        for messages_file in glob.glob(f"{self.data_path}/{channel_name}/*.json"):
            with open(messages_file, 'r') as f:
                for message in json.load(f):
                    yield message


    def extract_text(self, elements):
        """ Recursively parse an 'elements' structure, 
            converting user elements to their real names.
        """
        text = ''
        for element in elements:
            if 'elements' in element:
                text += self.extract_text(element['elements'])
            el_type = get(element, 'type')
            if el_type == 'text':
                if get(get(element, 'style'), 'code'): text += '`'
                text += element['text']
                if get(get(element, 'style'), 'code'): text += '`'
            elif el_type == 'link':
                text += get(element, 'url')
            elif el_type == 'rich_text_preformatted':
                text += "\n"
            elif el_type == 'user':
                user_id = element['user_id']
                try:
                    text += self.id2realname[user_id]
                except KeyError:
                    logger.error(f"No such user '{user_id}'")
                    text += user_id

        return text

    def parse_message(self, message):
        """ Parse a message into text that will be read by a GPT model. 
        """
        thread_id, text_msg = None, None
        if get(message, 'type') == 'message':
            if 'subtype' in message and get(message, 'subtype') in IGNORED_SUBTYPES:
                pass
            else:
                ts = message['ts']
                thread_ts = get(message, 'thread_ts') or ts
                thread_id = Decimal(thread_ts)

                # Translate user
                user_id = message['user']
                try:
                    realname = self.id2realname[user_id]
                except KeyError:
                    try:
                        realname = message['user_profile']['display_name']
                    except KeyError:
                        realname = user_id
                    
                if 'blocks' in message:
                    text = self.extract_text(message['blocks'])
                else:
                    text = message['text']
                
                text_msg = re.sub("<@(.*?)>", lambda m: self.id2realname[m.group(1)], text)
                text_msg = fix_text(text_msg)

                if 'attachments' in message:
                    for attachment in message['attachments']:
                        if 'title' in attachment: text_msg += f"\n{fix_text(attachment['title'])}"
                        if 'text' in attachment: text_msg += f"\n{fix_text(attachment['text'])}"
                        
                if 'files' in message:
                    for file in message['files']:
                        if 'name' in file:
                            # There are several cases where a file doesn't have a name:
                            # 1) The file has been deleted (mode=tombstone)
                            # 2) We have no access (file_access=access_denied)
                            text_msg += f"\n<{file['name']}>"

                if 'reactions' in message:
                    text_msg += f"\nOthers reacted to the previous message with "
                    r = [f"{reaction['name']} a total of {reaction['count']} times" for reaction in message['reactions']]
                    text_msg += ", and with ".join(r) + "."

                text_msg = f"{realname} said: {text_msg}\n"
        
        return thread_id, text_msg


    def create_document(self, channel_id, ts, doc_text):
        final_text = doc_text
        with open('tempTestGen.txt', 'w') as file:
            file.write(final_text)
        loader = TextLoader("./tempTestGen.txt")
        
        return loader.load()

    def load_documents(self, channel_name):
        channel_id = self.channel2id[channel_name]
        messages = {}
        for message in self.get_messages(channel_name):
            try:
                thread_id, text_msg = self.parse_message(message)
            except Exception as e:
                logger.error(f"Error parsing message: {message}")
                raise e
                
            if thread_id and text_msg:
                if thread_id not in messages:
                    messages[thread_id] = []
                messages[thread_id].append(text_msg)

        prev_id = Decimal(0)
        documents = []
        doc_text = ""
        start_ts = None

        for thread_id in sorted(list(messages.keys())):

            # Create a new document whenever messages are separated by a longer pause
            if doc_text and thread_id-prev_id > DOCUMENT_PAUSE_SECS:
                doc = self.create_document(channel_id, start_ts, doc_text)
                documents.append(doc)
                doc_text = ""
                start_ts = None

            logger.debug(thread_id)

            # Starting timestamp for the next document
            if not start_ts:
                start_ts = str(thread_id)

            # Add all messages from the current thread
            for text_msg in messages[thread_id]:
                doc_text += text_msg

            prev_id = thread_id

        # Add final document
        doc = self.create_document(channel_id, start_ts, doc_text)
        documents.append(doc)

        return documents


    def load_all_documents(self):
        documents = []
        for channel_name in self.channel2id.keys():
            for doc in self.load_documents(channel_name):
                documents.append(doc)
        return documents



In [9]:
"""DOCUMENT LOADER ALL SOURCES"""
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
# generator with openai models
from dotenv import load_dotenv
from langchain_community.llms import Ollama
import pandas as pd

import os
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
from langchain_community.document_loaders import TextLoader
import os

# DONE: Recursively load all scraped files in the directory and its subdirectories
# loader = TextLoader("./test.txt")
# documents = loader.load()
# print(documents)

from langchain_community.document_loaders import TextLoader
import os
import glob

import glob
import os
import json
import mimetypes
from bs4 import BeautifulSoup

"""ArchivedSlackLoader
slack_to_2023-05-18

"""
import random

class DocumentLoader:
    def __init__(self, root_dir):
        self.root_dir = root_dir

    def load_documents(self):
        all_documents = []
        for folder_name in os.listdir(self.root_dir):
            folder_path = os.path.join(self.root_dir, folder_name)
            if os.path.isdir(folder_path):
                if folder_name == "wiki":
                    loader = WikiLoader(folder_path)
                elif folder_name == "slack":
                    # Specify the two subdirectories for slack
                    subdirs = ["janelia-software/slack_to_2023-05-18"]
                    for subdir in subdirs:
                        subfolder_path = os.path.join(folder_path, subdir)
                        # Check if the subdirectory exists
                        if os.path.isdir(subfolder_path):
                            loader = ArchivedSlackLoader(subfolder_path)
                            documents = loader.load_all_documents()
                            # Take a random 10% sample
                            # sample_size = max(1, len(documents) // 10)
                            # documents_sample = random.sample(documents, sample_size)
                            # all_documents.extend(documents_sample)
                            all_documents.extend(documents)
                elif folder_name == "janelia.com":
                    loader = WebSiteLoader(folder_path)
                else:
                    continue  # Skip if folder doesn't match any criteria
                # For non-slack directories
                if folder_name != "slack":
                    documents = loader.load_all_documents()
                    # Take a random 10% sample
                    # sample_size = max(1, len(documents) // 10)
                    # documents_sample = random.sample(documents, sample_size)
                    # all_documents.extend(documents_sample)
                    all_documents.extend(documents)
        return all_documents
    
    def test_load_documents(self):
        # This method is for testing purposes and will only load the first document in each folder path
        all_documents = []
        for folder_name in os.listdir(self.root_dir):
            folder_path = os.path.join(self.root_dir, folder_name)
            if os.path.isdir(folder_path):
                if folder_name == "wiki":
                    loader = WikiLoader(folder_path)
                elif folder_name == "slack":
                    # Specify the two subdirectories for slack
                    subdirs = ["janelia-software/slack_to_2023-05-18"]
                    for subdir in subdirs:
                        subfolder_path = os.path.join(folder_path, subdir)
                        # Check if the subdirectory exists
                        if os.path.isdir(subfolder_path):
                            loader = ArchivedSlackLoader(subfolder_path)
                            documents = loader.load_all_documents()
                            # Only load the first document for testing
                            if documents:
                                all_documents.append(documents[0])
                elif folder_name == "janelia.com":
                    loader = WebSiteLoader(folder_path)
                else:
                    continue  # Skip if folder doesn't match any criteria
                # For non-slack directories
                if folder_name != "slack":
                    documents = loader.load_all_documents()
                    # Only load the first document for testing
                    if documents:
                        all_documents.append(documents[0])
        return all_documents

# Assuming your data folder is at "./data/"
loader = DocumentLoader("../data")
documents = loader.test_load_documents()
with open('documents.txt', 'w') as file:
    for document in documents:
        file.write(str(document) + '\n')
# # Assuming documents is a list of strings or convertible to string


# Now `final_df` contains all the generated testsets in one DataFrame

INFO:__main__:Loaded 170 users
INFO:__main__:Loaded 44 channels
ERROR:__main__:No such user 'WAPC2SXJN'
ERROR:__main__:No such user 'W8C6WFVM4'
ERROR:__main__:No such user 'W9GJ4UF33'
ERROR:__main__:No such user 'W0129A3DR8B'
ERROR:__main__:No such user 'WBHHEM2AU'
ERROR:__main__:No such user 'W010W8A1EBF'
ERROR:__main__:No such user 'W010W8A1EBF'
ERROR:__main__:No such user 'W010W8A1EBF'
ERROR:__main__:No such user 'WA7Q7CKGS'
ERROR:__main__:No such user 'U02QZ8GH64X'
ERROR:__main__:No such user 'W013JPYQ5PA'
ERROR:__main__:No such user 'W013JPYQ5PA'
ERROR:__main__:No such user 'WD5FSBZTJ'
ERROR:__main__:No such user 'W010W8A1EBF'
ERROR:__main__:No such user 'UMVJ4KRV2'
ERROR:__main__:No such user 'U03PL1HLZBP'
ERROR:__main__:No such user 'U040HM3D0TU'
ERROR:__main__:No such user 'UN7R87EUE'
ERROR:__main__:No such user 'U040HM3D0TU'
ERROR:__main__:No such user 'U028YV8LZUP'
ERROR:__main__:No such user 'UN7R87EUE'
ERROR:__main__:No such user 'UN7R87EUE'
ERROR:__main__:No such user 'UN7

In [27]:
"""RUN EVALS"""
from datasets import Dataset
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.testset.generator import TestsetGenerator
from langchain_community.document_loaders import TextLoader
from ragas.testset.evolutions import simple, reasoning, multi_context
import pandas as pd
import pyarrow 

from dotenv import load_dotenv
import os

load_dotenv()  # Load .env file
api_key = os.environ.get("OPENAI_API_KEY")

# generator_llm = Ollama(model="llama3:70b")
# critic_llm = Ollama(model="llama3:70b")

# generator_llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=api_key)
# critic_llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=api_key)
# embeddings = OpenAIEmbeddings(api_key=api_key)


# generator_llm = Ollama(model="phi")
# critic_llm = Ollama(model="phi")


generator_llm = Ollama(model="llama3")
critic_llm = Ollama(model="llama3")
embeddings = OllamaEmbeddings(
    model="avr/sfr-embedding-mistral"
)


"""curl http://e02u30.int.janelia.org:11434/api/generate -d '{
  "model": "llama3",
  "prompt":"Why is the sky blue?"
}'"""
# generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
# critic_llm = ChatOpenAI(model="gpt-3.5-turbo")


generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
) 

# current_testset = generator.generate_with_langchain_docs(document, test_size=1, distributions={"simple": 0.5, "reasoning": 0.25, "multi_context": 0.25})



# Initialize an empty list to collect all testsets

# Iterate over each document

def datasetFix(df):
    columns_to_keep = ['question', 'ground_truth', 'contexts']
    df = df[columns_to_keep]
    
    # Apply transformations
    df['contexts'] = df['contexts'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
    df['ground_truth'] = df['ground_truth'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)

    
    return df

all_data_df = pd.DataFrame()
  
for document in documents:
    test_size_gen = int(len(str(document)) // 500)
    current_testset = generator.generate_with_langchain_docs(document, test_size=test_size_gen, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
    current_testset = current_testset.to_pandas()

    # current_testset.to_parquet('importMe.parquet', index=False)
    current_df = datasetFix(current_testset)

    # current_df = datasetFix(current_testset)
    
    all_data_df = pd.concat([all_data_df, current_df], ignore_index=True)    # If this is the first iteration, set the DataFrame, otherwise append to it

display(all_data_df)




INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"




INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




INFO:ragas.testset.docstore:Document [ID: 8f49cbbb-a463-47c6-ac94-1cb45252d24c] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 8f49cbbb-a463-47c6-ac94-1cb45252d24c] has no filename, using `doc_id` instead


--- Logging error ---
Generating:   0%|          | 0/10 [00:21<?, ?it/s]Exception ignored in: <generator object tqdm.__iter__ at 0x30c557520>
Traceback (most recent call last):
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/tqdm/std.py", line 1196, in __iter__
    self.close()
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/tqdm/std.py", line 1302, in close
    self.display(pos=0)
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/tqdm/std.py", line 1495, in display
    self.sp(self.__str__() if msg is None else msg)
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/tqdm/std.py", line 459, in print_status
    fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/tqdm/std.py", line 453, in fp_write
    fp_flush()
  File "/Users/bakhalea/Documents/gp

INFO:ragas.testset.evolutions:seed question generated: What is the purpose of running JACS containers on the Scientific Computing Server - e03u07?
INFO:ragas.testset.evolutions:seed question generated: Here is a question that can be fully answered from the given context:

"What is the purpose of the Scientific Computing Server - e03u07?"

This question can be answered by reading the "Purpose" section of the context, which states: "Runs the production services for the Janelia Workstation using Docker Swarm. This is the master server."
INFO:ragas.testset.evolutions:seed question generated: Here is a question that can be fully answered from the given context:

"What is the purpose of installing and configuring s3fs on the Scientific Computing Server?"

This question can be answered by referring to the "Purpose" section, which states that the server runs production services for the Janelia Workstation using Docker Swarm. Additionally, the "Software" section mentions that s3fs is used for m



INFO:ragas.testset.evolutions:retrying evolution: 2 times




INFO:ragas.testset.evolutions:rewritten question: Here is the rewritten question:

"What service does s3fs enable for mounting on the Scientific Computing Server, given its purpose of running production services using Docker Swarm?"

This question requires the reader to make multiple logical connections and inferences. They need to understand that the server runs production services using Docker Swarm (from the "Purpose" section) and that s3fs is used for mounting AWS S3 buckets (from the "Software" section). The reader then needs to infer that s3fs enables the mounting of these buckets on the Scientific Computing Server, which is the master server.
INFO:ragas.testset.evolutions:seed question generated: Here's a question that can be fully answered from the given context:

"What is an alternative approach to NumPy-compatible matrix library accelerated by CUDA, as mentioned in the scicomp virtual happy hour?"

This question uses the keyphrase "GPU-accelerated array computing" and can be 



INFO:ragas.testset.evolutions:seed question generated: Here's a question that can be fully answered from the given context:

"What is CuPy, and how does it differ from other libraries like Numba?"

This question can be answered by referencing the provided context, which mentions that CuPy is a "NumPy-compatible matrix library accelerated by CUDA" and that Davis Bennett mentioned it as an alternative to Numba.
INFO:ragas.testset.evolutions:retrying evolution: 3 times
INFO:ragas.testset.evolutions:seed question generated: Here's a question that can be fully answered from the given context:

"What is an alternative to Numba mentioned in this conversation?"

This question can be answered by referencing the message where Cameron Arshadi says "nice, seems like a good alternative to numba" and linking it to the inducer/pyopencl library.
INFO:ragas.testset.evolutions:retrying evolution: 4 times
INFO:ragas.testset.evolutions:seed question generated: Here's a question that can be fully answered 

Generating: 100%|██████████| 4/4 [04:10<00:00, 62.71s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['contexts'] = df['contexts'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ground_truth'] = df['ground_truth'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
embedding nodes:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


embedding nodes:  50%|█████     | 1/2 [00:00<00:00,  6.15it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


                                                              

INFO:ragas.testset.docstore:Document [ID: 8f49cbbb-a463-47c6-ac94-1cb45252d24c] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 8f49cbbb-a463-47c6-ac94-1cb45252d24c] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 5eb509fc-7beb-40d8-8275-518dce1dd15a] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 5eb509fc-7beb-40d8-8275-518dce1dd15a] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 8f49cbbb-a463-47c6-ac94-1cb45252d24c] has no filename, using `doc_id` instead
INFO:ragas.testset.docstore:Document [ID: 5eb509fc-7beb-40d8-8275-518dce1dd15a] has no filename, using `doc_id` instead


Generating:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:ragas.testset.evolutions:seed question generated: Here's a question that can be fully answered from the given context:

"What is Davis Bennett's comparison between using Numba and CuPy for accelerating array computing operations?"

This question can be answered by referencing Davis Bennett's statement about comparing the performance of a CUDA kernel jitted with Numba to the same operation on a CuPy array.


Generating: 100%|██████████| 1/1 [00:13<00:00, 13.46s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['contexts'] = df['contexts'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ground_truth'] = df['ground_truth'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)


Unnamed: 0,question,ground_truth,contexts
0,Here is a question that can be fully answered ...,The purpose of the Scientific Computing Server...,[[Scientific Computing Server - e03u07\nTitle:...
1,Here is a question that can be fully answered ...,The purpose of the Scientific Computing Server...,[[Scientific Computing Server - e03u07\nTitle:...
2,Here's a question that can be fully answered f...,Davis Bennett compared the performance of a CU...,[[Davis Bennett said: to inaugurate this chann...


In [28]:

questions_list = all_data_df['question'].tolist()
print (questions_list)

seen = set()
questions_list = [x for x in questions_list if not (x in seen or seen.add(x))]

for item in questions_list:
    print(item)

    

# testset.to_json("testset.json")
# Creates dataset of ground truths contexts and questions for the testset
# Missing answers column 
# On one medium size document, it took about 1 minutes to generate 10 questions and cost 3 dollars on OpenAI
# Seems expensive when using gpt-4, is its use justified or does 3.5 get the job done?
# Evaluate gpt-4 vs gpt-3.5-turbo-16k for RAGAS evaluation test data generation


['Here is a question that can be fully answered from the given context:\n\n"What is the purpose of the Scientific Computing Server, e03u07?"\n\nThis question can be answered by reading the "Purpose" section of the context, which states that the server "Runs the production services for the Janelia Workstation using Docker Swarm. This is the master server.', 'Here is a question that can be fully answered from the given context:\n\n"What is the purpose of the Scientific Computing Server - e03u07?"\n\nThis question can be answered by reading the "Purpose" section of the context, which states: "Runs the production services for the Janelia Workstation using Docker Swarm. This is the master server.', 'Here\'s a question that can be fully answered from the given context:\n\n"What is Davis Bennett\'s comparison between using Numba and CuPy for accelerating array computing operations?"\n\nThis question can be answered by referencing Davis Bennett\'s statement about comparing the performance of a

fetch the LLM's response and append to a list

In [34]:
pd.set_option('display.max_rows', None)

display(pd.read_parquet("ragasTestSet_2.parquet"))

Unnamed: 0,question,ground_truth,contexts
0,Here's a question that can be fully answered f...,Setting the visibility of a node in this hiera...,[[ ...\n @overload\n def first(se...
1,Here's a question that can be fully answered f...,"When no parent is found for an image, a warnin...",[[ parent_zarr = None\n if image:\n...
2,Here's a question that can be fully answered f...,A newly created node may be considered higher ...,[[ ...\n @overload\n def first(se...
3,Here's a question that can be fully answered f...,The newly created node may be considered highe...,[[ ...\n @overload\n def first(se...
4,"Based on the given context, a suitable questio...",The `matches` method in the `Labels` class che...,"[[ node = Node(zarr, self, visibility=visibili..."
5,Here's a question that can be fully answered f...,The exception is swallowed,[[ parent_zarr = None\n if image:\n...
6,How would Srini visualize simulation results t...,Srini would visualize simulation results by pl...,[[Srini said: Huh. If the result is so sensiti...
7,How does a Node's structure impact its visibil...,The Node class has properties like `visible` a...,"[[ node = Node(zarr, self, visibility=visibili..."
8,"What makes nodes visible in a hierarchy, given...",Nodes are made visible in a hierarchy based on...,"[[ node = Node(zarr, self, visibility=visibili..."
9,Here's a question that can be fully answered f...,The `load` method iterates over the specs in t...,[[ ...\n @overload\n def first(se...


In [29]:
# Assuming SemanticSearchParser is your class and it has a method named 'generate_response' that takes a question and returns an answer

# Initialize your SemanticSearchParser class
# Adjust this step if your class initialization requires different parameters


# Now you can import the class

from generate_answer import SemanticSearchService

weaviate_url = "http://localhost:8777"
service = SemanticSearchService(weaviate_url)
print (questions_list)

# List to store answers (optional)
answers_list = []


# Loop through each question in the questions_list
for question in questions_list:
    # Use the question as input to get the answer
    answer = service.generate_response(question)
    
    # Print the answer
    
    # Optionally, append the answer to answers_list for further processing
    answers_list.append(answer)

print (answers_list)
# temp_ans_list = ["The Janelia Scientific Computing team operates and maintains a world-class computational infrastructure that includes a high-performance compute cluster with over 5000 cores and 300 GPUs. This infrastructure is used to analyze and mine the large amounts of data produced by Janelia's scientists. The team also supports a state-of-the-art storage and compute infrastructure across two data centers, which currently supports over 15 petabytes of scientific data. This data is split across various storage tiers and is connected with an optical fiber ring. The team also maintains a 4500 sq ft data center with significant power and cooling capacity. \n\nIn addition to hardware, the team also has deep software skills in a broad range of programming languages, extendable applications, frameworks, cloud & cluster technologies, and databases. These skills are used to help with research and engineering tasks, from quick questions to full software life cycle support. The team's software skills, combined with their domain knowledge in areas such as image processing, machine learning, data handling, microscopy, instrument control, 3D graphics & visualization, and bioinformatics & transcriptomics, allow them to efficiently work with both experimentalists and computer scientists. \n\nThe team also develops and maintains a variety of tools and projects, such as NeuronBridge, HortaCloud, VVD Viewer, EASI-FISH pipeline, Render, RS-FISH, and BigStitcher, which are used for various aspects of data analysis and simulation in biological research.", "The Janelia Scientific Computing team provides a wide range of support for advanced imaging techniques and image analysis. They offer consultation on experiment design as well as image visualization and processing. They also provide comprehensive image and data analysis support for multiple software packages through hands-on assistance and/or custom-written macros/plugins/scripts for ImageJ/FIJI, MATLAB, Imaris, etc. \n\nIn addition, they maintain several computer workstations dedicated to viewing and processing large image datasets acquired with the facility's instruments. These workstations are equipped with a suite of imaging software, including a full version of Imaris, and have robust hardware specifications to handle large datasets. \n\nThe team also has deep domain knowledge in image processing, machine learning, data handling, and 3D graphics & visualization, which allows them to efficiently work with experimentalists and computer scientists in various research areas.", "The Janelia Scientific Computing team provides world-class computational infrastructure to support the institute's scientific endeavors. They operate and maintain all of Janelia’s storage and associated backup infrastructure, high performance compute cluster, and all Linux systems. They also manage Janelia’s data center and backup and disaster recovery resources. The team supports a Linux compute cluster with over 5000 cores and 300 GPUs, and is responsible for maintaining many other Linux servers and workstations. They also handle a significant amount of data, with almost 100TB of new Janelia’s data being safely backed up every month.\n\nIn addition to infrastructure, the Scientific Computing Software team works closely with Janelia's labs and project teams, providing everything from answering quick questions to full software life cycle support. They have a broad range of software skills, including programming languages, extendable applications, frameworks, cloud & cluster technologies, and databases. They also have deep domain knowledge in areas like image processing, machine learning, data handling, microscopy, instrument control, 3D graphics & visualization, and bioinformatics & transcriptomics. \n\nThe team also identifies opportunities for code reuse, reducing development overhead and support costs across Janelia. They are strong proponents of open science and have created the Open Science Software Initiative. Most of their software is open source and available via GitHub. They also run a Scientific Computing Associates program to embed associates in SciComp and the lab or team they work with. \n\nThe team is led by Stephan Preibisch and consists of three teams: Software Engineering headed by Konrad Rokicki, Computational Methods and Solutions, both headed by Stephan Preibisch. They have developed several tools and projects like NeuronBridge, HortaCloud, VVD Viewer, EASI-FISH pipeline, Render, RS-FISH, and BigStitcher. \n\nIn summary, the Janelia Scientific Computing team supports the institute's mission and drives innovation in modern biological research by providing robust computational infrastructure, software support, and developing innovative tools and solutions.", 'The Janelia Scientific Computing team supports advanced imaging techniques in neuroscience and cell biology through a variety of ways. They have deep domain knowledge in image processing, machine learning, data handling, electron and light microscopy, instrument control, 3D graphics & visualization, bioinformatics & transcriptomics. They also develop and maintain a range of software tools and applications that aid in these areas. Some of these tools include NeuronBridge for finding neuron matches across modalities, HortaCloud for cloud-based collaborative annotation, VVD Viewer for volumetric rendering of 3D/4D microscopy data, and BigStitcher for efficient alignment of multi-tile and multi-angle image datasets. They also work closely with labs and project teams, providing full software life cycle support.', "The Janelia Scientific Computing team supports modern biological research in several ways. They maintain a world-class computational infrastructure, including storage and backup infrastructure, a high-performance compute cluster, and all Linux systems. They also manage Janelia's data center and backup and disaster recovery resources. The team supports data storage infrastructure for storing and accessing scientific data, with over 15 petabytes of scientific data split across various storage tiers. They also support a Linux compute cluster with over 5000 cores and 300 GPUs, and maintain many other Linux servers and workstations. \n\nIn addition to infrastructure support, the Scientific Computing Software team works closely with Janelia's labs, project teams, and shared resources to help with research and engineering tasks. They provide everything from answering quick questions to full software life cycle support. The team's software skills span a broad range of programming languages, extendable applications, frameworks, cloud & cluster technologies, and databases. They also have deep domain knowledge in image processing, machine learning, data handling, electron and light microscopy, instrument control, 3D graphics & visualization, bioinformatics & transcriptomics. \n\nThe team also develops and maintains a variety of tools and projects, such as NeuronBridge, HortaCloud, VVD Viewer, EASI-FISH pipeline, Render, RS-FISH, and BigStitcher. They are strong proponents of open science and most of their software is open source and available via GitHub. They also run the Scientific Computing Associates program, which offers challenging assignments for those interested in computational science.", "The Janelia Scientific Computing team provides comprehensive support for advanced imaging techniques in neuroscience, cell biology, and bioinformatics through a variety of collaborations and custom software tools. They work closely with Janelia's labs, project teams, and shared resources to assist with research and engineering tasks. This can range from answering quick questions to providing full software life cycle support.\n\nThe team's software skills cover a broad range of programming languages, extendable applications, frameworks, cloud & cluster technologies, and databases. They have deep domain knowledge in image processing, machine learning, data handling, electron and light microscopy, instrument control, 3D graphics & visualization, bioinformatics & transcriptomics. Many team members have backgrounds in biology, enabling them to work efficiently with experimentalists and computer scientists.\n\nThe team is also involved in various projects and tools such as NeuronBridge, HortaCloud, VVD Viewer, EASI-FISH pipeline, Render, RS-FISH, and BigStitcher, which are designed to support advanced imaging techniques and data analysis in neuroscience, cell biology, and bioinformatics.\n\nFurthermore, the team is a strong proponent of open science and has teamed up with the Computation & Theory research area to create the Open Science Software Initiative. Most of their software is open source and available via GitHub, promoting collaboration and knowledge sharing. They also run the Scientific Computing Associates program, which embeds associates in SciComp and the lab or team they work with."]




ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-10922' coro=<as_completed.<locals>.sema_coro() running at /Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/executor.py:37> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:618]>


Exception ignored in: <coroutine object Executor.wrap_callable_with_index.<locals>.wrapped_callable_async at 0x315b04f40>
Traceback (most recent call last):
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/executor.py", line 111, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/testset/evolutions.py", line 142, in evolve
    ) = await self._aevolve(current_tries, current_nodes)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/testset/evolutions.py", line 466, in _aevolve
    simple_question, current_nodes, _ = await self.se._aevolve(
                                        ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/sit

ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-10920' coro=<as_completed.<locals>.sema_coro() done, defined at /Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/executor.py:35> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:618]>
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-10921' coro=<as_completed.<locals>.sema_coro() done, defined at /Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/ragas/executor.py:35> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:618]>


Exception ignored in: <function ClientResponse.__del__ at 0x176ab3f60>
Traceback (most recent call last):
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/aiohttp/client_reqrep.py", line 891, in __del__
    self._connection.release()
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/aiohttp/connector.py", line 173, in release
    self._connector._release(
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/aiohttp/connector.py", line 667, in _release
    protocol.close()
  File "/Users/bakhalea/Documents/gpt-semantic-search/env/lib/python3.12/site-packages/aiohttp/client_proto.py", line 71, in close
    transport.close()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/selector_events.py", line 1210, in close
    super().close()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/selector_events.py", line 875, in cl

ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x311d9fce0>
ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x30c3c0050>
ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x311d95ca0>




KeyboardInterrupt: 

Add values from the list of JaneliaGPT responses to the dataframe["answer"]

In [None]:
df['answer'] = None
# Assuming df is your DataFrame and answers_list is a list with values to populate the 'Answer' column
if len(df) == len(answers_list):
    df['answer'] = answers_list
else:
    print("The length of answers_list does not match the number of rows in the DataFrame.")

df.dropna(axis=1, how='all', inplace=True)
display(df)
# df.to_json('WithAnswersDatasetFromTestTxt.json', index=True)


Preprocess the dataframe to conver to a Dataset

In [None]:
#Attempt to fix the issue with the answers_list not bsjdkflasdjjfklasdkl fasdjkfh jkas
df_fix = df
# List of columns to keep
columns_to_keep = ['question', 'ground_truth', 'answer', 'contexts']

# Reassign df to a DataFrame containing only the columns to keep

df_fix= df_fix[columns_to_keep]



# Assuming df is already defined and contains the necessary columns

# Convert 'question' and 'answer' to lists of strings if they are not already
# df_fix['question'] = df_fix['question'].apply(lambda x: [x] if isinstance(x, str) else x)
# df_fix['answer'] = df_fix['answer'].apply(lambda x: [x] if isinstance(x, str) else x)

# Ensure 'contexts' and 'ground_truth' are lists of lists of strings
# This step assumes 'contexts' and 'ground_truth' are already in the correct format
# If not, you would need to apply a similar conversion as above, ensuring each element is a list

# Example conversion if 'contexts' and 'ground_truth' were not already lists of lists
df_fix['contexts'] = df_fix['contexts'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
df_fix['ground_truth'] = df_fix['ground_truth'].apply(lambda x: [[y] for y in x] if isinstance(x, list) and all(isinstance(y, str) for y in x) else x)
display(df_fix)
# Now df should be in the correct format for training

from datasets import load_dataset

amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")

from datasets import Dataset
dataset_fix = Dataset.from_pandas(df_fix)
dataset_fix = dataset_fix.remove_columns(['__index_level_0__'])


Convert dataframe to Dataset and compare to a vaild Dataset for analysis

In [None]:
from datasets import Features

# Assuming dataset_fix and amnesty_qa["eval"] are your datasets
features_dataset_fix = dataset_fix.features
# features_amnesty_eval = amnesty_qa["eval"].features
def format_columns(example):
    # Format 'question', 'answer', and 'ground_truths' columns to single values
    for column in ['ground_truth', 'answer', 'question']:
        if column in example and example[column]:
            example[column] = example[column]
    
    # Correctly format 'contexts' column to a list of list of strings
    if 'contexts' in example:
        # Ensure 'contexts' is a list of strings (not a list of lists)
        if isinstance(example['contexts'], list):
            # If the items are lists (or other non-string types), flatten and convert to strings
            example['contexts'] = [str(item) for sublist in example['contexts'] for item in (sublist if isinstance(sublist, list) else [sublist])]
        else:
            # If 'contexts' is not a list, convert it into a list of a single string
            example['contexts'] = [str(example['contexts'])]

    
    return example


# Apply the transformation to both datasets
dataset_fix = dataset_fix.map(format_columns)
# Direct comparison of data types
"""if set(features_dataset_fix.keys()) == set(features_amnesty_eval.keys()):
    all_types_match = True
    for key in features_dataset_fix.keys():
        type_dataset_fix = type(features_dataset_fix[key]).__name__
        type_amnesty_eval = type(features_amnesty_eval[key]).__name__
        if type_dataset_fix != type_amnesty_eval:
            print(f"Data type for feature '{key}' differs between datasets. dataset_fix: {type_dataset_fix}, amnesty_qa['eval']: {type_amnesty_eval}")
            all_types_match = False
    if all_types_match:
        print("The data types of all features in both datasets match.")
else:
    print("The Features of the datasets differ in their keys.")"""
"""
print (dataset_fix["question"])
print (dataset_fix["answer"])
print (dataset_fix["ground_truth"])
print (dataset_fix["contexts"])
"""



In [None]:
"""from deepeval.metrics import (
    ContextualPrecisionMetric,
    ContextualRecallMetric,
    ContextualRelevancyMetric
)

contextual_precision = ContextualPrecisionMetric()
contextual_recall = ContextualRecallMetric()
contextual_relevancy = ContextualRelevancyMetric()

from deepeval.test_case import LLMTestCase
from deepeval import evaluate
modified_items = []

# Step 2: Iterate over each item in dataset_fix
for item in dataset_fix:
    # Step 3: Create a new LLMTestCase instance with modified fields
    modified_item = LLMTestCase(
        input=item["question"],
        actual_output=item["answer"],
        expected_output=item["ground_truth"],
        retrieval_context=item["contexts"]
    )
    # Step 4: Append the modified item to the list
    modified_items.append(modified_item)
# Assuming dataset_fix supports item assignment



evaluate(
    test_cases=[modified_items],
    metrics=[contextual_precision, contextual_recall, contextual_relevancy]
)

"""

Run evaluation to gather the below metrics

In [None]:

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)


from ragas import evaluate

eval_llm = Ollama(model="llama3")
# Will reutrn a dataframe with the metrics
# Returns error for now because answers column is missing
# Error is misleading, fix dataset first and make it match the docs example dataset

result = evaluate(
    dataset_fix,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm=eval_llm,
)

result



In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
result.to_pandas()



In [None]:
import pandas as pd
import plotly.express as px
import json
import os

class MetricsManager:
    def __init__(self, file_path='metrics.json'):
        self.file_path = file_path
        self.metrics = self.load_metrics()

    def load_metrics(self):
        if os.path.exists(self.file_path):
            with open(self.file_path, 'r') as file:
                return json.load(file)
        else:
            return {}

    def save_metrics(self):
        with open(self.file_path, 'w') as file:
            json.dump(self.metrics, file, indent=4)

    def update_metrics(self, trial_name, context_precision, faithfulness, answer_relevancy, context_recall):
        averages = {
            'context_precision': context_precision,
            'faithfulness': faithfulness,
            'answer_relevancy': answer_relevancy,
            'context_recall': context_recall
        }
        self.metrics[trial_name] = averages
        self.save_metrics()

    def render_table(self):
        if not self.metrics:
            print("No metrics available for plotting.")
            return

        trials, metrics, averages = [], [], []
        for trial_name, metrics_averages in self.metrics.items():
            for metric, average in metrics_averages.items():
                trials.append(trial_name)
                metrics.append(metric)
                averages.append(round(average, 4))

        Eval_Categories = pd.DataFrame({
            'Trial': trials,
            'Metric': metrics,
            'Average': averages
        })

        if Eval_Categories.empty:
            print("No metrics data to plot.")
            return

        fig = px.bar(
            Eval_Categories,
            x='Trial',
            y='Average',
            color='Metric',
            barmode='group',
            text='Average',
            category_orders={"Metric": ["context_precision", "faithfulness", "answer_relevancy", "context_recall"]},
            labels={
                "Average": "Average Score",
                "Metric": "Metric",
                "Trial": "Trial Name"
            }
        )

        fig.update_layout(
            width=1000,
            height=600,
            title="<b>Average Scores of Evaluation Metrics by Trial</b>",
            xaxis_title="Trial Name",
            yaxis_title="Average Score",
            font=dict(size=15)
        )

        fig.show()

MetricsManager()