In [2]:
import pandas as pd
import altair as alt
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import os
import numpy as np
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [3]:
def read_mongo(collection, is_duplicates, temp=None):
    uri = os.getenv("MONGODB_URI")
    client = MongoClient(uri, server_api=ServerApi('1'))
    try:
        db = client["data"]
        collection = db[collection]
        if temp is not None:
            document = collection.find_one({"is_duplicates":is_duplicates, "temperature":temp})
        else:    
            return None
        return document
    except Exception as e:
        # TODO: Add error handling
        print(e)

In [4]:
# Create 'family' and 'variant' columns
def process_model_name(model_name):
    if ':' in model_name:
        variant_sizes = model_name.split(':')
        family = variant_sizes[0]
        variant = float(variant_sizes[-1].replace('b', ''))
    else:
        family = model_name
        variant = 15
    return family, variant

In [5]:
def bar_chart(group, metric):    
    # Create a bar chart for the metric
    chart = alt.Chart(group).mark_bar().encode(
        x=alt.X('temperature:O', title='Temperature'),
        xOffset='dataset:N',  # Offset bars for different datasets
        y=alt.Y('avg_score:Q', title='Average Score', scale=alt.Scale(domain=[0,1])),
        color=alt.Color('dataset:N', title='Dataset', legend=alt.Legend(orient="top")),
        tooltip=['model', 'avg_score']
    ).properties(
        width=400,
        height=300,
        title=metric
    )

    return chart


In [6]:
data = []

def float_range(start, stop, step):
    while start < stop:
        yield start
        start += step
is_duplicates = [False, True]
for boolean in is_duplicates:
    for temp in float_range(0, 2.5, 0.5):
        data.append(read_mongo("avg_result", boolean, temp))

# Flatten the data into a DataFrame
flat_data = []
for entry in data:
    temperature = entry["temperature"]
    is_duplicates = entry["is_duplicates"]
    for result in entry["result"]:
        flat_data.append({
            "temperature": temperature,
            "is_duplicates": is_duplicates,
            "model": result["model"],
            "avg_score": result["avg_score"],
            "metric": result["metric"],
        })

# Convert to a pandas DataFrame
df = pd.DataFrame(flat_data)
df["dataset"] = df["is_duplicates"].apply(lambda x: "Duplicates" if x else "Variants")

# Display the flat data
print(df.head())

   temperature  is_duplicates   model  avg_score                     metric  \
0          0.0          False  gpt-4o   0.463491  Non-LLM String Similarity   
1          0.0          False  gpt-4o   0.343013                  BlueScore   
2          0.0          False  gpt-4o   0.491706                Rouge Score   
3          0.0          False  gpt-4o   0.853529    LLM Semantic Similarity   
4          0.5          False  gpt-4o   0.440254  Non-LLM String Similarity   

    dataset  
0  Variants  
1  Variants  
2  Variants  
3  Variants  
4  Variants  


In [7]:
metric_order = ['BlueScore', 'Rouge Score', 'Non-LLM String Similarity', 'LLM Semantic Similarity']
all_average_scores = []
# Loop over metrics in the specified order
for metric in metric_order:
    # Filter the DataFrame for the current metric
    group = df[df['metric'] == metric].copy()
    
    # Sort models by avg_score in descending order
    # sorted_temp = group.sort_values('avg_score')['metric'].tolist()
    chart = bar_chart(group, metric)
    chart.display()

In [8]:
def heatmap(df, dataset_name):
    # Create the heatmap
    heatmap = alt.Chart(df).mark_rect().encode(
        x=alt.X('temperature:O', title='Temperature'),  # Ordinal encoding for categorical temperature
        y=alt.Y('metric:N', title='Metric'),           # Nominal encoding for metric
        color=alt.Color('avg_score:Q', title='Average Score', scale=alt.Scale(scheme='viridis')),  # Continuous color encoding
        tooltip=['temperature:O', 'metric:N', 'avg_score:Q']  # Add tooltips for interactivity
    ).properties(
        width=300, 
        height=300,
        title=f"{dataset_name}"
    )
    return heatmap

In [12]:
datasets = ["Duplicates", "Variants"]

for dataset in datasets:
    df_dataset = df[df["dataset"] == dataset].copy()
    chart = heatmap(df_dataset, dataset)
    chart.display()

In [10]:
average_scores = df.groupby(['temperature', 'dataset'])['avg_score'].mean().reset_index()

# Line chart
dot_chart = alt.Chart(average_scores).mark_point(filled=True).encode(
    x=alt.X('temperature:O', title='Temperature'),
    y=alt.Y('avg_score:Q', title='Consistency'),
    color=alt.Color('dataset:N', title='Dataset', legend=None),
    # strokeDash=alt.StrokeDash('dataset', legend=None),
    tooltip=['temperature:O', 'dataset:N', 'avg_score:Q']
)
line_chart = alt.Chart(average_scores).mark_line().encode(
    x=alt.X('temperature:O', title='Temperature'),
    y=alt.Y('avg_score:Q', title='Consistency'),
    color=alt.Color('dataset:N', title='Dataset'),
    strokeDash=alt.StrokeDash('dataset', legend=alt.Legend(title='Dataset')),
    tooltip=['temperature:O', 'dataset:N', 'avg_score:Q']
)

chart = (line_chart + dot_chart).properties(
    width=600,  # Chart width
    height=400  # Chart height
)

chart

In [11]:
print(average_scores)

   temperature     dataset  avg_score
0          0.0  Duplicates   0.887521
1          0.0    Variants   0.537935
2          0.5  Duplicates   0.756706
3          0.5    Variants   0.527577
4          1.0  Duplicates   0.650826
5          1.0    Variants   0.505137
6          1.5  Duplicates   0.582673
7          1.5    Variants   0.455108
8          2.0  Duplicates   0.460510
9          2.0    Variants   0.337644
