In [12]:
import pandas
import os
import plotly.express as px

In [13]:
def get_json_files(folder):
    return [f for f in os.listdir(folder) if f.endswith(".json")]

In [14]:
def calculate_accuracy(df):
    df[df['chatbot_response_clean'] == df['label']].shape[0]
    return df[df['chatbot_response_clean'] == df['label']].shape[0] / df.shape[0]


## Analyze ChatGPT

In [26]:
folder = "data/results/chat_gpt"
files = get_json_files(folder)

# exclude 
files = [f for f in files if f != "2023-04-24 16:08:16.804316_simple_promt.json"]

results_cg = []

for file in files:
    df = pandas.read_json(os.path.join(folder, file))
    accuracy = calculate_accuracy(df)
    # get the chatbot_question of the 0th row
    question = df.iloc[0]['chatbot_question']
    results_cg.append({
        "model": "chat_gpt",
        "question": question,
        "accuracy": accuracy
    })

# plot the results
df = pandas.DataFrame(results_cg)
# Convert the 'question' column to a string representation for display
df['question_label'] = df['question'].apply(lambda x: str(x))
# Convert the index to a string representation for display
df['index_label'] = df.index.astype(str)

# Create a bar chart
fig = px.bar(df, x='index_label', y='accuracy', labels={'index_label': 'Index', 'accuracy': 'Accuracy'})

# Customize the chart
fig.update_layout(
    title='Question Accuracy CHAT-GPT',
    xaxis_title='Questions',
    yaxis_title='Accuracy',
    xaxis_tickangle=-45,
)

# Show the chart
fig.show()

# Print the question and their index
for index, row in df.iterrows():
    print(f"{index}: {row['question_label']}")



0: [{'role': 'system', 'content': 'You are a helpful assistant that tries to tell if two products are the same. Two products are only the same if all attributes match! Only answer yes or no.'}, {'role': 'user', 'content': 'product 1: Brother HL-L6200DW Wireless High Speed Mono Laser 2-sided printing 250 sheet Network 46PPM, product 2: Brother HL-L6300DW Laser Printer - Monochrome - Duplex'}, {'role': 'assistant', 'content': 'The two'}, {'role': 'user', 'content': 'Please answer yes or no otherwise I will not be able to understand you.'}, {'role': 'user', 'content': 'product 1: Brother HL-L6200DW Wireless High Speed Mono Laser 2-sided printing 250 sheet Network 46PPM, product 2: Brother HL-L6300DW Laser Printer - Monochrome - Duplex'}, {'role': 'assistant', 'content': 'No.'}, {'role': 'user', 'content': 'Very good, thank you!'}, {'role': 'user', 'content': 'product 1: Corsair Vengeance LPX RAM 16GB 2400MHz DDR4 UDIMM 288 Pin Desktop Memory, product 2: 16GB Corsair Vengeance LPX schwarz 

## Analyse GPT4All

In [16]:
folder = "data/results/gpt4all"
files = get_json_files(folder)

results_4a = []

for file in files:
    df = pandas.read_json(os.path.join(folder, file))
    accuracy = calculate_accuracy(df)
    # get the chatbot_question of the 0th row
    question = df.iloc[0]['chatbot_question']
    results_4a.append({
        "model": "chat_gpt",
        "question": question,
        "accuracy": accuracy
    })

# plot the results
df = pandas.DataFrame(results_4a)
# Convert the 'question' column to a string representation for display
df['question_label'] = df['question'].apply(lambda x: str(x))
# Convert the index to a string representation for display
df['index_label'] = df.index.astype(str)

# Create a bar chart
fig = px.bar(df, x='index_label', y='accuracy', labels={'index_label': 'Index', 'accuracy': 'Accuracy'})

# Customize the chart
fig.update_layout(
    title='Question Accuracy GPT4All',
    xaxis_title='Questions',
    yaxis_title='Accuracy',
    xaxis_tickangle=-45,
)

# Show the chart
fig.show()

# Print the question and their index
for index, row in df.iterrows():
    print(f"{index}: {row['question_label']}")



0: Are these two products the same? product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch
1: Are these two products the same? product 1: Brother HL-L6200DW Wireless High Speed Mono Laser 2-sided printing 250 sheet Network 46PPM, product 2: Brother HL-L6300DW Laser Printer - Monochrome - Duplex


## Analyze Aleph Alpha

In [17]:
folder = "data/results/aleph_alpha"
files = get_json_files(folder)

results_aa = []

for file in files:
    df = pandas.read_json(os.path.join(folder, file))
    accuracy = calculate_accuracy(df)
    # get the chatbot_question of the 0th row
    question = df.iloc[0]['chatbot_question']
    results_aa.append({
        "model": "chat_gpt",
        "question": question,
        "accuracy": accuracy
    })

# plot the results
df = pandas.DataFrame(results_aa)
# Convert the 'question' column to a string representation for display
df['question_label'] = df['question'].apply(lambda x: str(x))
# Convert the index to a string representation for display
df['index_label'] = df.index.astype(str)

# Create a bar chart
fig = px.bar(df, x='index_label', y='accuracy', labels={'index_label': 'Index', 'accuracy': 'Accuracy'})

# Customize the chart
fig.update_layout(
    title='Question Accuracy aleph_alpha',
    xaxis_title='Questions',
    yaxis_title='Accuracy',
    xaxis_tickangle=-45,
)

# Show the chart
fig.show()

# Print the question and their index
for index, row in df.iterrows():
    print(f"{index}: {row['question_label']}")



0: Are these two products the same? product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch


## Comparing the 3 Models on benchmark

In [24]:
files = [{
    "name": "gpt4all",
    "file": "data/results/gpt4all/2023-04-24 16:27:14.136349_simple_promt.json"
    },
    {
    "name": "chatgpt",
    "file": "data/results/chat_gpt/2023-04-24 17:20:03.458940_simple_promt.json"},
    {
    "name": "aleph_alpha",
    "file": "data/results/aleph_alpha/2023-04-24 22:42:34.718717_simple_promt.json"}
]

results = []

for file in files:
    df = pandas.read_json(file["file"])
    accuracy = calculate_accuracy(df)
    # get the chatbot_question of the 0th row
    question = df.iloc[0]['chatbot_question']
    results.append({
        "model": file["name"],
        "question": question,
        "accuracy": accuracy
    })

# plot the results
df = pandas.DataFrame(results)
# Convert the 'question' column to a string representation for display
df['question_label'] = df['question'].apply(lambda x: str(x))
# Convert the index to a string representation for display
df['index_label'] = df.index.astype(str)

# Create a bar chart
fig = px.bar(df, x='model', y='accuracy', labels={
             'model': 'Model Name', 'accuracy': 'Accuracy'})

# Customize the chart
fig.update_layout(
    title='Question Accuracy on Benchmark Questions',
    xaxis_title='Questions',
    yaxis_title='Accuracy',
    xaxis_tickangle=-45,
)

# Show the chart
fig.show()

# Print the question and their index
for index, row in df.iterrows():
    print(f"{index}: {row['question_label']}")

0: Are these two products the same? product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch
1: [{'role': 'system', 'content': 'Are these two products the same?'}, {'role': 'user', 'content': "product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch"}]
2: Are these two products the same? product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch


## Model comparison on random sample

In [25]:
files = [{
    "name": "gpt4all",
    "file": "data/results/gpt4all/2023-04-24 17:37:23.585556_simple_promt.json"
    },
    {
    "name": "chatgpt",
    "file": "data/results/chat_gpt/2023-04-24 16:08:16.804316_simple_promt.json"},
]

results = []

for file in files:
    df = pandas.read_json(file["file"])
    accuracy = calculate_accuracy(df)
    # get the chatbot_question of the 0th row
    question = df.iloc[0]['chatbot_question']
    results.append({
        "model": file["name"],
        "question": question,
        "accuracy": accuracy
    })

# plot the results
df = pandas.DataFrame(results)
# Convert the 'question' column to a string representation for display
df['question_label'] = df['question'].apply(lambda x: str(x))
# Convert the index to a string representation for display
df['index_label'] = df.index.astype(str)

# Create a bar chart
fig = px.bar(df, x='model', y='accuracy', labels={
             'model': 'Model Name', 'accuracy': 'Accuracy'})

# Customize the chart
fig.update_layout(
    title='Question Accuracy on random sample',
    xaxis_title='Questions',
    yaxis_title='Accuracy',
    xaxis_tickangle=-45,
)

# Show the chart
fig.show()

# Print the question and their index
for index, row in df.iterrows():
    print(f"{index}: {row['question_label']}")

0: Are these two products the same? product 1: Brother HL-L6200DW Wireless High Speed Mono Laser 2-sided printing 250 sheet Network 46PPM, product 2: Brother HL-L6300DW Laser Printer - Monochrome - Duplex
1: [{'role': 'system', 'content': 'You are a helpful assistant that tries to tell if two products are the same. Two products are only the same if all attributes match! Only answer yes or no. 0 means no, 1 means yes.'}, {'role': 'user', 'content': 'product 1: Brother HL-L6200DW Wireless High Speed Mono Laser 2-sided printing 250 sheet Network 46PPM, product 2: Brother HL-L6300DW Laser Printer - Monochrome - Duplex'}]
