In [1]:
from ipywidgets import interact
import ipywidgets as widgets
import asyncio
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import subprocess
from IPython.display import FileLink
import os
def display_images(original_path, generated_path):
    # Set up the figure with a specified size
    plt.figure(figsize=(10, 5))
    
    # Display original image
    plt.subplot(1, 2, 1)  # 1 row, 2 columns, 1st subplot
    img = mpimg.imread(original_path)
    plt.imshow(img)
    plt.axis('off')  # Turn off the axis numbers and ticks
    plt.title('Original')  # Add a title to the first image
    
    # Display generated image
    plt.subplot(1, 2, 2)  # 1 row, 2 columns, 2nd subplot
    img = mpimg.imread(generated_path)
    plt.imshow(img)
    plt.axis('off')
    plt.title('Generated')  # Add a title to the second image
    
    # Show the plot with both images
    plt.show()

def load_data(filepath):
    with open(filepath, 'r') as f:
        data = json.load(f)
    return data

def wait_for_change(func_button):
    future = asyncio.Future()

    def evaluate(button):
        future.set_result(button.description)
        func_button.on_click(
            evaluate, remove=True
        )  # we need to free up the binding to getvalue to avoid an InvalidState error buttons don't support unobserve, so use `remove=True`

    func_button.on_click(evaluate)
    return future

In [2]:
json_filepath = 'output_direct.json'
output_filepath = "eval_output_direct.json"
if os.path.exists(output_filepath):
    list_file = load_data(output_filepath)
else:
    list_file = load_data(json_filepath)


In [4]:
out = widgets.Output()  # Create an Output widget to capture and display items


def display_data_point(i, dic_data):
    if dic_data.get('eval', False):
        if i + 1 < len(list_file):
            display_data_point(i + 1, list_file[i + 1])
        return
    with out:
        figure_path = dic_data['figure_path']
        output_path = dic_data["output_figure_path"]
        caption = dic_data['caption']
        print(figure_path)
        caption_display = widgets.Label(value="Figure Caption:" + caption)
        display(caption_display)
        display_images(figure_path, output_path)
        custom_layout = widgets.Layout(width='70%')
        title1 = widgets.Label("Structural Components")
        plot_type_correct = widgets.RadioButtons(
                options=["Yes", "No"],
                value=None,
                description="Does the generated figure represent the correct type of plot as stated?",
                disabled=False,
            )
        axes_grids_criteria = ["1 - Axes are missing or misplaced.",
             "2 - Axes are present but not positioned accurately.",
             "3 - Axes are mostly accurate but with minor positioning or scale issues.",
             "4 - Axes are accurately placed with only negligible discrepancies.",
             "5 - Axes placement is accurate and indistinguishable from the original."]
        tick_marks_criteria = [
            "1 - Tick marks and grid lines are missing or incorrectly placed.",
            "2 - Some tick marks or grid lines are present but have significant inaccuracies.",
            "3 - Most tick marks and grid lines are correctly placed but some errors are noticeable.",
            "4 - Tick marks and grid lines are well-placed with very minor deviations.",
            "5 - Tick marks and grid lines are placed exactly as in the original."
        ]

        tick_marks_rating = widgets.RadioButtons(
                options=tick_marks_criteria,
                value=None,
                description="Correctness of tick marks and grid lines Rating:",
                disabled=False,
                layout=custom_layout
            )
        axes_grids_rating = widgets.RadioButtons(
                options=axes_grids_criteria,
                value=None,
                description="Presence and placement of axes Rating:",
                disabled=False,
                layout=custom_layout
            )

        text_elements_criteria = [
            "1 - Text elements like titles, axis labels, legend, and annotations are missing or completely different in style and position.",
            "2 - Text elements like titles, axis labels, legend, and annotations are present but style and position poorly match the original.",
            "3 - Text elements like titles, axis labels, legend, and annotations have a somewhat similar style and position but with notable differences.",
            "4 - Text elements like titles, axis labels, legend, and annotations style and position are closely matched to the original with minor deviations.",
            "5 - Text elements like titles, axis labels, legend, and annotations matche the original in both style and position perfectly."
        ]

        text_elements_rating = widgets.RadioButtons(
                options=text_elements_criteria,
                value=None,
                description="Rate the accuracy of text elements like titles, axis labels, legend, and annotations for style and position.",
                disabled=False,
                layout=custom_layout
            )
        title2 = widgets.Label("Stylistic Components")
        color_match_criteria = [
            "1 - Colors used are completely different from the original.",
            "2 - Colors somewhat resemble those in the original, but the match is poor.",
            "3 - Colors are generally similar, with a few inaccuracies.",
            "4 - Color palette is very close to the original with negligible differences.",
            "5 - Color match is perfect, with indistinguishable differences from the original."
        ]
        color_rating = widgets.RadioButtons(
                options=color_match_criteria,
                value=None,
                description="Rate the use of color palettes and their matching with the original figure.",
                disabled=False,
                layout=custom_layout
            )
        line_styles_criteria = [
            "1 - Line/bar/marker styles are inconsistent with no match to the original.",
            "2 - Line/bar/marker styles show an attempt at consistency, but there are significant mismatches.",
            "3 - Line/bar/marker styles are mostly consistent, with a few noticeable discrepancies.",
            "4 - Line/bar/marker styles match well with the original, with minor inconsistencies.",
            "5 - Line/bar/marker styles are consistent and match the original exactly."
        ]
        line_styles_rating = widgets.RadioButtons(
                options=line_styles_criteria,
                value=None,
                description="Rate if the line types, bar types (solid, dashed, dotted, etc.), marker styles are consistent with the original.",
                disabled=False,
                layout=custom_layout
            )
        title3 = widgets.Label("Numerical Value Similarity")
        numerical_accuracy_criteria = [
            "1 - Numerical values are not at all accurately represented; major discrepancies are visible.",
            "2 - Some elements are somewhat accurate, but there are significant visual differences.",
            "3 - Most numerical values appear to be visually similar, with some minor inaccuracies.",
            "4 - Numerical values are very closely represented, with very few and hard-to-notice differences.",
            "5 - Numerical values are visually indistinguishable from the original figure."
        ]
        numerical_accuracy_rating = widgets.RadioButtons(
                options=numerical_accuracy_criteria,
                value=None,
                description="Estimate the visual accuracy of numerical representations (e.g., bar heights, point locations) compared to the original.",
                disabled=False,
                layout=custom_layout
            )
        title4 = widgets.Label("Practical Utility")
        figure_adaptability_criteria = [
            "1 - It would be extremely difficult; the figure requires major revisions to be usable.",
            "2 - It would be somewhat difficult; the figure needs several significant changes to be adaptable.",
            "3 - It would be moderately easy; the figure needs some adjustments to be practical for reuse.",
            "4 - It would be very easy; the figure requires only minor tweaks to adapt to new data.",
            "5 - It would be extremely easy; the figure can be used as-is or with minimal modifications."
        ]
        figure_adaptability_rating = widgets.RadioButtons(
                options=figure_adaptability_criteria,
                value=None,
                description="Rate how helpful the figure is for easily recreating a similar style with minor modifications.",
                disabled=False,
                layout=custom_layout
            )
        button = widgets.Button(description="Submit",
                                disabled=False,
                                button_style="")
        
        Box = widgets.VBox([title1, plot_type_correct, axes_grids_rating, tick_marks_rating, text_elements_rating, title2, color_rating, line_styles_rating, title3, numerical_accuracy_rating,title4, figure_adaptability_rating, button])
        display(Box)

        def on_submit(btn):
            type = plot_type_correct.value
            axes_grids = int(axes_grids_rating.value[0])
            tick_marks = int(tick_marks_rating.value[0])
            text_elements = int(text_elements_rating.value[0])
            color = int(color_rating.value[0])
            line_styles = int(line_styles_rating.value[0])
            numerical_accuracy = int(numerical_accuracy_rating.value[0])
            figure_adaptability = int(figure_adaptability_rating.value[0])
            evaluation = {
                "axes_grids_rating": axes_grids,
                "tick_marks_rating": tick_marks,
                "plot_type_correct": type,
                "text_elements_rating": text_elements,
                "color_rating": color,
                "line_styles_rating": line_styles,
                "numerical_accuracy_rating": numerical_accuracy,
                "figure_adaptability_rating": figure_adaptability,
            }
            dic_data["eval"] = evaluation
            list_file[i] = dic_data
            with open(output_filepath, "w") as file:
                json.dump(list_file, file, indent=4)
            out.clear_output(wait=True)
            if i + 1 < len(list_file):
                display_data_point(i + 1, list_file[i + 1])

        button.on_click(on_submit)


display(out)  # Display the Output widget initially

# Start the process by displaying the first data point
display_data_point(0, list_file[0])

Output()

: 