In [14]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

results_path = '/home/dsemchin/results/'
models = ['logistic', 'transition', 'diffusion', 'rd', 'ACP', 'rd_flipped']
sample_sizes = [100, 250, 500, 1000]
std_values = [0.0, 0.1, 0.2, 0.3, 0.5, 0.7]

def load_data(model, sample_size, std):
    prior_file = os.path.join(results_path, f'{model}_n_{sample_size}_std_{std}_with_prior.csv')
    no_prior_file = os.path.join(results_path, f'{model}_n_{sample_size}_std_{std}_no_prior.csv')

    prior_data = pd.read_csv(prior_file)
    no_prior_data = pd.read_csv(no_prior_file)

    return prior_data, no_prior_data

def create_kendall_spearman_boxplots(prior_data, no_prior_data):
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))

    metrics_kendall = ['greedy_kendalltau', 'best_kendalltau']
    metrics_spearman = ['greedy_spearmanr', 'best_spearmanr']

    no_prior_data['Type'] = 'No Prior'
    prior_data['Type'] = 'With Prior'

    # kendall plot
    combined_data_kendall = pd.concat([no_prior_data[metrics_kendall + ['Type']], prior_data[metrics_kendall + ['Type']]])
    sns.boxplot(x='Type', y='value', hue='variable', 
                data=pd.melt(combined_data_kendall, id_vars='Type', value_vars=metrics_kendall), ax=axes[0])
    axes[0].set_ylim(-1.1, 1.1)
    axes[0].set_title('Kendall Tau Comparison')

    # spearman plot
    combined_data_spearman = pd.concat([no_prior_data[metrics_spearman + ['Type']], prior_data[metrics_spearman + ['Type']]])
    sns.boxplot(x='Type', y='value', hue='variable', 
                data=pd.melt(combined_data_spearman, id_vars='Type', value_vars=metrics_spearman), ax=axes[1])
    axes[1].set_ylim(-1.1, 1.1)
    axes[1].set_title('Spearman R Comparison')

    plt.tight_layout()
    plt.show()

def create_num_iters_boxplot(prior_data, no_prior_data):
    combined_iters = pd.DataFrame({
        'Iterations': pd.concat([no_prior_data['num_iters'], prior_data['num_iters']]),
        'Type': ['No Prior'] * len(no_prior_data['num_iters']) + ['With Prior'] * len(prior_data['num_iters'])
    })
    
    plt.figure(figsize=(5, 4))
    sns.boxplot(x='Type', y='Iterations', data=combined_iters, showfliers=False)
    plt.title('Number of Iterations Comparison')
    plt.show()

# AHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
def create_trace_plot(prior_data, no_prior_data):
    # parse the loglike values, ensuring they are lists of floats
    loglike_prior = [np.array(eval(log)) if isinstance(log, str) else log for log in prior_data['loglike']]
    loglike_no_prior = [np.array(eval(log)) if isinstance(log, str) else log for log in no_prior_data['loglike']]

    max_length = max(max(len(arr) for arr in loglike_prior), max(len(arr) for arr in loglike_no_prior))
    
    padded_prior = [np.pad(arr, (0, max_length - len(arr)), constant_values=np.nan) for arr in loglike_prior]
    padded_no_prior = [np.pad(arr, (0, max_length - len(arr)), constant_values=np.nan) for arr in loglike_no_prior]
    
    avg_loglike_prior = np.nanmean(padded_prior, axis=0)
    avg_loglike_no_prior = np.nanmean(padded_no_prior, axis=0)

    plt.figure(figsize=(10, 4))
    plt.plot(avg_loglike_no_prior, label='No Prior', color='blue')
    plt.plot(avg_loglike_prior, label='With Prior', color='orange')
    plt.xlabel('Steps')
    plt.ylabel('Log-likelihood')
    plt.title('Average Log-likelihood Over Steps')
    plt.legend()
    plt.show()

# drop down time
model_dropdown = widgets.Dropdown(options=models, description='Model:')
sample_size_dropdown = widgets.Dropdown(options=sample_sizes, description='Sample Size:')
std_dropdown = widgets.Dropdown(options=std_values, description='Std:')
output = widgets.Output()

def update_plots(change):
    with output:
        output.clear_output()
        
        # load data for selected parameters
        model = model_dropdown.value
        sample_size = sample_size_dropdown.value
        std = std_dropdown.value
        
        prior_data, no_prior_data = load_data(model, sample_size, std)
        
        # generate the plots
        create_kendall_spearman_boxplots(prior_data, no_prior_data)
        create_num_iters_boxplot(prior_data, no_prior_data)
        #create_trace_plot(prior_data, no_prior_data)

# set up event listeners for dropdowns
model_dropdown.observe(update_plots, names='value')
sample_size_dropdown.observe(update_plots, names='value')
std_dropdown.observe(update_plots, names='value')

# display widgets and initial plots
display(model_dropdown, sample_size_dropdown, std_dropdown, output)
update_plots(None)  

Dropdown(description='Model:', options=('logistic', 'transition', 'diffusion', 'rd', 'ACP', 'rd_flipped'), val…

Dropdown(description='Sample Size:', options=(100, 250, 500, 1000), value=100)

Dropdown(description='Std:', options=(0.0, 0.1, 0.2, 0.3, 0.5, 0.7), value=0.0)

Output()