# Opacity Study (update: April 19 by Shano)



In [2]:
import revisitpy as rvt
import numpy as np
import pandas as pd
import altair as alt
import vl_convert as vlc
import itertools
import revisitpy_server as rs
import json
import time
import anywidget
import vega



# Meta Data
study_metadata = rvt.studyMetadata(
    authors=["Shano Liang"],
    organizations=["VIS Lab"],
    title='Opacity Judgment Study',
    description='',
    date='2025-04-03',
    version='1.2'
)


# UI Config
ui_config = rvt.uiConfig(
  contactEmail="sliang1@wpi.edu",
  logoPath="./assets/revisitLogoSquare.svg",
  sidebar=True,
  withProgressBar=False,
  nextOnEnter=True
)

# Introduction
introduction = rvt.component(type='markdown', path='./assets/introduction.md', component_name__= 'introduction')

# Snippet of the introduction component.
print(introduction)

{
    "correctAnswer": [],
    "path": "./assets/introduction.md",
    "response": [],
    "type": "markdown"
}


## Generate Curves

In [3]:
def generate_smooth_curve(num_points=100, seed=None, wave_combinations=None):
    """
    Generate a smooth random curved line with multiple wave combinations using a fixed seed.

    Parameters:
        num_points (int): Number of data points to generate.
        seed (int, optional): Random seed for reproducibility.
        wave_combinations(list,optional): List of wave combinations for each frequency component.

    Returns:
    - x: X values.
    - y: Y values.
    """
    if seed is not None:
        np.random.seed(seed)  # Set seed for reproducibility
    
    freq_factors = np.random.randint(1, 3, size=4).tolist()
    amp_factors = np.random.uniform(0.5, 10, size=4).tolist()
    noise_level = np.random.uniform(0, 0.02)
    x_shift = np.random.uniform(0, 3)
    y_shift = np.random.uniform(0, 3)

    if wave_combinations is None:
        wave_combinations = [['sin', 'sinc']]

    x = np.linspace(0, 10, num_points)
    y = np.zeros_like(x)

    for a, f, waves in zip(amp_factors, freq_factors, wave_combinations):
        for w in waves:
            if w == 'sin':
                y += a * np.sin(f * (x + x_shift))
            elif w == 'cos':
                y += a * np.cos(f * (x + x_shift))
            elif w == 'sinc':
                y += a * np.sinc(f * ((x + x_shift)))
            elif w == 'tanh':
                y += a * np.tanh(f * ((x + x_shift)))
            elif w == 'exp':
                y += a * np.exp(-0.5 * f * ((x + x_shift)))

    y += (np.random.normal(scale=noise_level, size=len(x)) + y_shift)
        # Stack into a 2D array
    data = np.column_stack((x, y))
    return x, y

## Plot Altair Vis

We now want to generate the datasets that will go into our vega charts. We don't yet have to worry about rendering these, we'll just define the functions to generate the data.

In [4]:
def plot_altair_curve(seed=None, num_curves=3, opacity=0.3):
    """
    Generate and plot multiple smooth random curved lines using Altair with shaded areas.
    Automatically scales the y-axis to fit the minimum and maximum values across all curves.
    """
    if seed is not None:
        np.random.seed(seed)  # Set seed for reproducibility
    
    curves_data = []
    shaded_data = []
    y_min_global = float('inf')
    y_max_global = float('-inf')

    for i in range(num_curves):
        curve_seed = seed + i if seed is not None else None
        x, y = generate_smooth_curve(num_points=100, seed=curve_seed)
        df = pd.DataFrame({'X': x, 'Y': y, 'Curve': f'Curve {i+1}'})
        curves_data.append(df)
        y_min_global = min(y_min_global, np.min(y))
        y_max_global = max(y_max_global, np.max(y))
    
    for df in curves_data:
        df_shade = df.copy()
        df_shade['Y0'] = y_min_global  # Shade from the global minimum y-value up to the curve
        shaded_data.append(df_shade)
    
    all_curves = pd.concat(curves_data)
    all_shaded = pd.concat(shaded_data)
    
    y_scale = alt.Scale(domain=[y_min_global, y_max_global])  # Auto-scale y-axis
    
    # line_chart = alt.Chart(all_curves).mark_line(opacity=opacity).encode(
    #     x='X:Q',
    #     y=alt.Y('Y:Q', scale=y_scale),
    #     color=alt.Color('Curve:N', legend=alt.Legend(title="Curves"))
    # )
    
    shaded_chart = alt.Chart(all_shaded).mark_area(opacity=opacity).encode(
        x='X:Q',
        y=alt.Y('Y0:Q', scale=y_scale),
        y2='Y:Q',
        color=alt.Color('Curve:N', legend=None)  # Use the same color as the line but without an extra legend
    )
    
    #return (shaded_chart + line_chart).properties(
    return (shaded_chart).properties(
        width=400,
        height=300,
        title="Curves"
    )

# Use the function to print a test plot
plot_altair_curve(seed=42, num_curves=3, opacity=0.3)

# Generate Opacity Pairs for Flexible Data Generation 
March 25

In [5]:
def generate_opacity_pairs(base_opacity=0.5, steps_config=[(0.01, 0.02), (0.03, 0.06), (0.1, 0.2)], min_val=0.0, max_val=1.0):
    pairs = set()
    for step_size, max_diff in steps_config:
        num_steps = int(max_diff / step_size)
        for i in range(1, num_steps + 1):
            delta = round(i * step_size, 5)
            lower = round(base_opacity - delta, 5)
            upper = round(base_opacity + delta, 5)
            if min_val <= lower <= max_val:
                pairs.add((base_opacity, lower))
            if min_val <= upper <= max_val:
                pairs.add((base_opacity, upper))
    return [list(pair) for pair in sorted(pairs)]

# Side by Side

For this study, we need to generate pairs of scatterplots and pairs of parallel coordinate plots. We will create two generalized functions which take in two data frames whose columns are 'X' and 'Y' and whose entries are tuples, indicating the coordinates. These functions will each return a vega-altair chart that will be added as components.

In [6]:
import random 

def get_shuffled_opacity(opacityGroup, rnd):
    """
    Shuffle the opacity group using a provided random.Random instance.
    Returns (opacity_left, opacity_right)
    """
    shuffled = rnd.sample(opacityGroup, k=2)
    return shuffled[0], shuffled[1]

def plot_side_by_side(seed=None, num_curves=3, opacityGroup=None, base_opacity=0.5, shuffle=True):
    """
    Generate and display two Altair charts side by side with different opacities.
    """
    if opacityGroup is None:
        # Use the first pair generated if no specific group is provided
        opacity_pairs = generate_opacity_pairs(base_opacity)
        if not opacity_pairs:
            raise ValueError("No valid opacity pairs generated.")
        opacityGroup = opacity_pairs[0]
    #chart1 = plot_altair_curve(seed=seed, num_curves=num_curves, opacity=opacityGroup[0])
    #chart2 = plot_altair_curve(seed=seed, num_curves=num_curves, opacity=opacityGroup[1])
    #return alt.hconcat(chart1, chart2)

    if shuffle:
        # give shuffle seed based on opacity generated
        shuffleSeed = opacityGroup[0]*10+opacityGroup[1]*10 
        rnd = random.Random(shuffleSeed)
        opacity_left, opacity_right = get_shuffled_opacity(opacityGroup,rnd)
    else:
        shuffled = opacityGroup  # Keep original order
        opacity_left, opacity_right = shuffled

    chart1 = plot_altair_curve(seed=seed, num_curves=num_curves, opacity=opacity_left)
    chart2 = plot_altair_curve(seed=seed, num_curves=num_curves, opacity=opacity_right)
    return alt.hconcat(chart1, chart2)

chart = plot_side_by_side(seed=42, num_curves=3, base_opacity=0.5)
chart

# Generate Vega Spec to combine Generated Data and Plots

Now that we have our functions to create the individual chart, we want a function that returns the correct vega spec when given the number of points, the correlation values, and the visualization type ('scatterPlot' or 'parallelPlot'). We'll use the number of points and the pair of correlation values to generate the dataset. Using the visualization type, we'll either return the scatter plot of this data or the parallel coordinates plot. Hover, instead of returning the vega-altair chart, we instead convert the chart to its vega-lite specification, then convert that into the true vega specification.


In [7]:
def create_vega_spec(visType, seed, num_curves=3, opacityGroup=None, base_opacity=0.5):
    """
    Generate a Vega spec from the Altair chart by converting it to Vega-Lite and then to Vega.
    """
    if visType == 'altairPlot':
        if opacityGroup is None:
            # If not provided, pick the first valid pair
            opacity_pairs = generate_opacity_pairs(base_opacity)
            if not opacity_pairs:
                raise ValueError("No valid opacity pairs generated.")
            opacityGroup = opacity_pairs[0]
        chart = plot_side_by_side(seed=seed, num_curves=num_curves, opacityGroup=opacityGroup)
    else:
        raise ValueError("Unsupported visualization type. Use 'altairPlot'.")
    
    vega_lite_spec = chart.to_json()
    vega_spec = vlc.vegalite_to_vega(vega_lite_spec, vl_version="5.20")

    vega_spec["autosize"] = {
        "type": "fit-y",
        "resize": "true",
        "contains": "content"
    }

    vega_spec["padding"] = {
        "left": 150,
        "right": 1000,
        "top": 20,
        "bottom": 20
    }

    # Centering charts
    # vega_spec["usermeta"] = vega_spec.get("usermeta", {})
    # vega_spec["usermeta"]["alignment"] = "center"
    return vega_spec
# We can print the test vega specification above to inspect its contents.
my_vega_spec = create_vega_spec(visType='altairPlot', seed=42, num_curves=3, opacityGroup=None)
#print(my_vega_spec)
json_spec = json.dumps(my_vega_spec, indent=2)
print(json_spec)


{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "background": "white",
  "padding": {
    "left": 150,
    "right": 1000,
    "top": 20,
    "bottom": 20
  },
  "height": 300,
  "data": [
    {
      "name": "data-8722fe2a6e5ca3148ad8c65dae86672c",
      "format": {},
      "values": [
        {
          "Curve": "Curve 1",
          "X": 0,
          "Y": 6.524268345675902,
          "Y0": -6.072722065208106
        },
        {
          "Curve": "Curve 1",
          "X": 0.101010101010101,
          "Y": 5.703314364269875,
          "Y0": -6.072722065208106
        },
        {
          "Curve": "Curve 1",
          "X": 0.202020202020202,
          "Y": 4.7926077404261616,
          "Y0": -6.072722065208106
        },
        {
          "Curve": "Curve 1",
          "X": 0.30303030303030304,
          "Y": 3.823216924172329,
          "Y0": -6.072722065208106
        },
        {
          "Curve": "Curve 1",
          "X": 0.404040404040404,
          "Y": 2.8268

# Creating The Component Function & Interaction Signals for ReVISit Trials

The `component_function` is used to transform every component in a given sequence to any new component. If we have a sequence that is the correct _structure_, then we call the `component()` method on that sequence and pass in the desired `component_function`. Any `meta` attributes in the original components are passed in as arguments to the `component_function`. 

We'll create a component function which takes in the visualization type, the correlation values, and the number of points and returns the correct vega specification component. Additionally, we append signals directly into the vega spec so that we can detect the user's right and left arrow keys. Instead of the user having to choose "left" or "right" in some drop down, the user will be able to use the left and right arrow keys to pick the chart. We add an additional signal to "highlight" the selected chart with a thick blue border. Finally, since we specified "nextOnEnter" as "True" in the "uiConfig", the user will also be able to proceed to the next component by pressing the "Enter" key. All of this combined creates a seamless study experience.

In [8]:
#def xor_cipher(data: str, key: str) -> str:
#    """Encrypt or decrypt data using XOR and a repeating key."""
#    return ''.join(chr(ord(c) ^ ord(key[i % len(key)])) for i, c in enumerate(data))
# Researcher key
#secret_key = "revisitStudy" 
def component_function(seed=None, opacityGroup=None, base_opacity=0.5):
    if seed is not None:
        if opacityGroup is None:
            # generate opacity pairs and select the first one
            opacity_pairs = generate_opacity_pairs(base_opacity)
            if not opacity_pairs:
                raise ValueError("No valid opacity pairs generated.")
            opacityGroup = opacity_pairs[0]
        
        # assign a not randomized shuffle seed
        shuffleSeed = opacityGroup[0]*10 + opacityGroup[1]*10 
        rnd = random.Random(shuffleSeed)
        leftImage,rightImage = get_shuffled_opacity(opacityGroup,rnd)
        # calculate which one is the correct answer
        correct_answer = "Left Image" if leftImage > rightImage else "Right Image"

        vega_response=rvt.response(
            id='button_selected',
            prompt='Please select the figure with higher opacity level (higher opacity means less transparent):',
            type='buttons',
            options=["Left Image", "Right Image"]
        )

        correct_answer_obj = rvt.answer(
            id='button_selected',
            answer=correct_answer
        )

        metadata_response = rvt.response(
            id='seed_L_R',
            prompt="seed_L_R (hidden)",
            type='shortText',
            hidden=True,
            required=False
        )

        metadata_correct_answer ={
                'seed': seed,
                'imageShuffleSeed':shuffleSeed,
                'opacity_left': leftImage,
                'opacity_right': rightImage
            }

        metadata_answer_obj = rvt.answer(
            id='seed_L_R',
            answer=metadata_correct_answer
        )

        vega_spec = create_vega_spec(
            visType='altairPlot',
            seed=seed,
            num_curves=3,
            opacityGroup=opacityGroup
        )

        # Create 'config' if missing
        if 'config' not in vega_spec:
            vega_spec['config'] = {}
        
        return rvt.component(
            type='vega',
            config=vega_spec,
            # alignment="center", # Centering Vega Items
            #component_name__=xor_cipher(f'{seed}-{opacityGroup[0]},{opacityGroup[1]}',secret_key),
            component_name__=f'{seed}-{leftImage},{rightImage}',
            response=[vega_response,metadata_response],
            correctAnswer=[correct_answer_obj, metadata_answer_obj]
        )
    
# You can print the output of our component function with some test values.
comp_func = component_function(seed=42)
print(comp_func)
#print(component_function(seed=42, opacityGroup=[0.3, 0.6]))



{
    "config": {
        "$schema": "https://vega.github.io/schema/vega/v5.json",
        "background": "white",
        "padding": {
            "left": 150,
            "right": 1000,
            "top": 20,
            "bottom": 20
        },
        "height": 300,
        "data": [
            {
                "name": "data-8722fe2a6e5ca3148ad8c65dae86672c",
                "format": {},
                "values": [
                    {
                        "Curve": "Curve 1",
                        "X": 0,
                        "Y": 6.524268345675902,
                        "Y0": -6.072722065208106
                    },
                    {
                        "Curve": "Curve 1",
                        "X": 0.101010101010101,
                        "Y": 5.703314364269875,
                        "Y0": -6.072722065208106
                    },
                    {
                        "Curve": "Curve 1",
                        "X": 0.202020202020202,
          

# Permuting the Final Sequence 

Here we generate the different combinations of the correlation values that we'd like (every combination of two numbers between 0 and 1 with precision 1). Then, we generate a fixed order sequence and being the permutations over our factors. We first permute over the visualization type, then over the number of points, then over all possible correlation value pairs.

When we permute over these factors, the corresponding factored will be added to the `meta` attributes of each component. By the end of these three permutations, we will have components that have 'visType', 'numPoints', and 'corrValues' key-value pairs in their `meta` attribute. Before calling the `component` method, these are all "filler" or "placeholder" components with no real value aside from their metadata attributes. Once we call the `component` method, each component is passed through the inputted `component_function` which will take the existing metadata as arguments. Thus, by the end of this method chaining, each component will be the correct vega component.

After we have finished generating the sequence, we add the entire component block to an a sequence only containing the introduction.

In [9]:

# Set up multiple base_opacity value (can be changed later if needed)
base_opacities = [0.3, 0.6, 0.9]

# Generate all combinations of pairs under base_opacity
dataSet = []
for base in base_opacities:
    pairs = generate_opacity_pairs(base)
    for pair in pairs:
        dataSet.append({'opacityGroup': pair, 'base_opacity': base})

random.shuffle(dataSet) 

factors = []
for i, data in enumerate(dataSet):
    group_index = i // 5
    data['seed'] = 101 + group_index
    factors.append(data)

# Generate all combinations of two values between 1 and 10
#combinations = itertools.combinations(range(1, 11), 2)

# Create the dataset with values divided by 10
#dataSet = [{'opacityGroup': [x / 10, y / 10]} for x, y in combinations]

main_sequence = rvt.sequence(order='fixed')

#main_sequence.permute(
#        factors=[{'seed': s} for s in [101, 102, 103, 104, 105, 106, 107]]
#        order='latinSquare',
#    ).permute(
#        factors=dataSet,
#        order='random',
#    ).component(component_function)

main_sequence.permute(
    factors=factors,
    order='random'
).component(component_function)

sequence = rvt.sequence(order='fixed',components=[introduction]) + main_sequence

study = rvt.studyConfig(
    schema='https://raw.githubusercontent.com/revisit-studies/study/v2.1.0/src/parser/StudyConfigSchema.json',
    uiConfig=ui_config,
    studyMetadata=study_metadata,
    sequence=sequence
)

# Prints the entire configuration file which is approximately 150,000 lines of JSON
print(study)


{
    "$schema": "https://raw.githubusercontent.com/revisit-studies/study/v2.1.0/src/parser/StudyConfigSchema.json",
    "components": {
        "introduction": {
            "correctAnswer": [],
            "path": "./assets/introduction.md",
            "response": [],
            "type": "markdown"
        },
        "107-0.93,0.9": {
            "config": {
                "$schema": "https://vega.github.io/schema/vega/v5.json",
                "background": "white",
                "padding": {
                    "left": 150,
                    "right": 1000,
                    "top": 20,
                    "bottom": 20
                },
                "height": 300,
                "data": [
                    {
                        "name": "data-540100945556de630edfc6edd6726e2b",
                        "format": {},
                        "values": [
                            {
                                "Curve": "Curve 1",
                                "X":

In [10]:
# Ensure the final study is passed to JSON and the widget
final_study = study

# turn final_study object into JSON and save
final_study_json = final_study.__str__()  # get JSON string
final_study_data = json.loads(final_study_json)  # turn into Python dictionary
#final_study_data["final_studyMetadata"]["title"] = "Opacity Judgment Study"

# save as config.json
with open("config.json", "w", encoding="utf-8") as f:
    json.dump(final_study_data, f, indent=2)

print("✅ config.json generated!")

✅ config.json generated!


# Using `revisitpy_server` to Prepare Our Widget

The `revisitpy` package provides a widget in order to preview our study directly in a Jupyter notebook. We can interact with the study, check that vega signals work, and even create some introductory data ourselves. In order for the widget to work, a local copy of the reVISit must be running on your local computer. If you already have reVISit locally (colloqioully our `study` repo), then all you need to do is navigate to your repository and run `yarn serve`. After this, the widget we create in this jupyter notebook will be useable.

A simpler way to achieve the same goal, however is using the `revisitpy_server` Python package. This is a simple python package which already has the most recent reVISit repository built and runs a server locally. After installing `revisitpy_server`, all that is required is the following:

In [11]:
process = rs.serve()

Server is running in the background at http://localhost:8080


# The Widget

Now that your server is running, we create the widget with the configuration file we created above. When calling the widget, we are assuming that the assets referenced in the configuration file are relative to this notebook. The widget then copies these static assets to the appropriate directory. Since we're currently using the `revisitpy_server` package, you'll see that they copied into the assets of the local virtual environment `revisitpy_server` package.

In [12]:
w = rvt.widget(study, server=True)

# In your own Jupyter notebook, calling `w` will now display the widget in a fully interactive manner.
w

Copying file from ./assets/introduction.md to d:\revisit\revisitpy-examples-main-02\revisitpy-examples-main-02\.venv\Lib\site-packages\revisitpy_server/static/__revisit-widget/assets/introduction.md
Copying file from ./assets/revisitLogoSquare.svg to d:\revisit\revisitpy-examples-main-02\revisitpy-examples-main-02\.venv\Lib\site-packages\revisitpy_server/static/__revisit-widget/assets/revisitLogoSquare.svg


Widget(config={'$schema': 'https://raw.githubusercontent.com/revisit-studies/study/v2.1.0/src/parser/StudyConf…

# Optional: Data Collection

Now that we have the widget running, we can check out some sample data that would be generated from a user. Start by going through a small portion of the study. Once you've gone through the desired number of components inside the widget, navigate to the analysis dashboard using the 'Analysis' tab in the upper left-hand corner. Here you'll see individual participants and the data that they've generated. 

From here, we can export this data back into our Jupyter notebook. Start by clicking the "Download as Tidy CSV" on the right-hand side above the table. Here you'll be shown a preview of the CSV file with some additional options to truncate the data. In the bottom right-hand corner, you'll see a button with the Python icon. Clicking on this button will send the Tidy CSV back to the Jupyter notebook. Once the button is clicked, we can preview the data like so:

In [13]:
w.get_df()

KeyError: 'rows'

# Optional: Terminate the server

Closing the notebook will automatically terminate the server. If you'd rather do this manually, you can do the following.

In [14]:
process.terminate()