In [388]:
import difflib
from typing import List, Tuple
import os

In [389]:
from selenium import webdriver
from selenium.webdriver.common.by import By

# browser setup
browser = webdriver.Chrome()
browser.implicitly_wait(10)


In [390]:


def save_and_open_difference_table(baseline: str, permutations: List[Tuple[str, str]]) -> None:
    # Create HtmlDiff Object
    my_html = difflib.HtmlDiff()

    # Generate tables with problematic data
    deltas = [my_html.make_table(
         [thing],[baseline],  label,"Baseline",
    ) for label, thing in permutations]

    # Combine the header and all tables into one HTML document
    full_html = """<!DOCTYPE html><html><head>
    <title>NooP+ Difference Table</title>
    <style>
    table.diff {
        width: 100%;
        table-layout: fixed;
        margin-bottom: 20px; /* Add space between tables */
        border-spacing: 0; /* Remove default spacing */
    }
    td, th {
        word-wrap: break-word;
        overflow-wrap: break-word;
        white-space: normal;
        font-size: 16px; /* Increase font size */
        padding: 8px; /* Add padding inside cells */
    }
    td.diff_next, th.diff_next {
        display: none; /* Hide columns with "t" */
    }
    td.diff_header, th.diff_header {
        display: none; /* Hide columns with "1" */
    }
    colgroup:nth-child(1),
    colgroup:nth-child(2),
    colgroup:nth-child(3),
    colgroup:nth-child(4) {
        display: none; /* Hide unnecessary colgroups */
    }

    /* Colors for diff classes */
    .diff_add {
        background-color: #d4edda; /* Light green background */
        color: #155724; /* Dark green text */
    }
    .diff_chg {
        background-color: #fff3cd; /* Light yellow background */
        color: #856404; /* Dark yellow (brownish) text */
    }
    .diff_sub {
        background-color: #f8d7da; /* Light red background */
        color: #721c24; /* Dark red text */
    }

    /* Optional: Add hover effect for better usability */
    td.diff_add:hover, td.diff_chg:hover, td.diff_sub:hover {
        filter: brightness(90%); /* Slightly darken on hover */
    }
</style>




    </head><body>
    """
    for delta in deltas:
        full_html += delta
    full_html += "</body></html>"

    # Save the HTML content to a file
    file_path = "difference_table.html"
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(full_html)

    # Open the saved HTML file in the default web browser
    browser.get(f"file://{os.path.abspath(file_path)}")

In [391]:
from datasets import load_from_disk
import pandas as pd
subsets = ["naive", "addition", "lexicon", "syntax"]
dataset_name = "LFrancis/GSM8k-NoOp-Plus"
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"

baseline_path = dataset_name+"_main_evaluated_" + MODEL_NAME
baseline_df = pd.DataFrame( load_from_disk(baseline_path))

question_column = "question"

datasets = []
for subset in subsets:
    path = dataset_name+"_main_"+subset + "_evaluated_" + MODEL_NAME
    datasets.append(pd.DataFrame(load_from_disk(path)))

In [392]:
import random

# Randomly sample a row index
random_index = random.randint(0, len(baseline_df) - 1)

# Extract the text questions for the random index from each dataset
rows = [df.iloc[random_index][question_column] for df in datasets]
rows = list(zip(subsets, rows))
baseline_row = baseline_df.iloc[random_index][question_column]
save_and_open_difference_table(baseline_row, rows)
