In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os
os.chdir('/home/yanis/crowd_geoloc') 
cwd = os.getcwd()


import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, uniform
import cmdstanpy as cmd
import seaborn as sn
from src.crowdgeoloc.one_d import sample_tokyo_latitudes, generate_annotators, annotate, \
FunctionNormalAnnotatorPopulation, SimpleNormalAnnotatorPopulation
from src.crowdgeoloc.experiment import ActiveAnnotationContest, mean_location_norm_error, mean_sigma_norm_error,softmax_stable
from src.crowdgeoloc.fixed import OneShotDirect,OneShotBayesian, OneShotConservative, OneShotConservative2, OneShotIterative, \
                                OneShotMean, KShot
from scipy.interpolate import CubicSpline
import pandas as pd
import pickle
import random
import re
import time

In [None]:
n_points = 1000
n_annotators = 50
redundancy = 3

In [None]:
models = {#"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          #"iterative":OneShotIterative(), 
          "conservative2":OneShotConservative2(),
          #"conservative":OneShotConservative(),
          "10shot":KShot(greedyness=0.1)
         }
metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}

In [None]:
#tokyo latitude sampling
def tok(n):
    t=np.array(sample_tokyo_latitudes(n)) #we sample the n points
    tok_norm=(t-np.min(t))/(np.max(t)-np.min(t)) #we normalize
    return(tok_norm)

In [None]:

# function to run multiple experiments and compute metrics

def compare(models, exp, metrics, repeats):
    results = []
    duration_list=[]
    for model_name, m in models.items():
        #print(model_name)
        result = {"model": model_name}
        start_time = time.time()
        for i in range(repeats):
            exp.reset()
            this_run = m.run(exp)
            result["iteration"] = i
            for metric_name, metric in metrics.items():
                result["metric"] = metric_name
                result["value"] = metric(exp, this_run)
                results.append(result.copy())
        end_time=time.time()
        duration=end_time-start_time
        duration_list.append(duration)
    return results,duration_list
    

In [None]:
#parameters are defined as a list of this shape:

#param=[n_points,n_annotators,redundancy,sigma_distrib,point_distrib,model_name]

In [None]:
def load_experiment(path):
    # Load the pickled file
    with open(path, 'rb') as f:
        data = pickle.load(f)

    return(data)

In [None]:
# load an experiment for a pickle file, return list of params
def load_experiment_setup(path):
    # Load the pickled file
    with open(path, 'rb') as f:
        data = pickle.load(f)

    return([data["nb_points"],data["nb_annotators"],data["redundancy"],data["sigma_distrib"],data["point_distrib"],data["points"],data["sigmas"],data["random_seed"]])

In [None]:
#run a single experiments (one set of params) and get the true pos, true sigmas, and predicted pos, predicted sigmas
def runexp(setup_file):
    
    '''
    takes a str: setup file as argument and compare the methods based on this setup
    '''

    params=load_experiment_setup(setup_file) #open setup file
    np.random.seed(params[7])
    
    models = {#"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          #"iterative":OneShotIterative(), 
          "conservative2":OneShotConservative2(),
          #"conservative":OneShotConservative(),
          "10shot":KShot(greedyness=0.1)
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
    
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    point_distr=params[4] #choice of point distrib
    
    if point_distr=='uniform':
        point_distribution = uniform()
        points = point_distribution.rvs(n_points)
    else:
        points =  tok(n_points)
        
    list_tru_sig=[]
    ann_set = annotator_population.sample(n_annotators)
    list_true_sig=[ann_set.annotators[k]._sigma for k in range(len(ann_set.annotators))]
    
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    list_sigm_pred=[]
    list_point_pred=[]
    for model_name, m in models.items():
        print(model_name)
        result = {"model": model_name}
        exp.reset()
        this_run = m.run(exp)
        locations=this_run["locations"]
        sigmas=this_run["sigmas"]
        list_sigm_pred.append(sigmas)
        list_point_pred.append(locations)
        #for metric_name, metric in metrics.items():
            #result["metric"] = metric_name
            #result["value"] = metric(exp, this_run)
            #results.append(result.copy())
    return points,list_true_sig,list_point_pred,list_sigm_pred

In [None]:
runexp('np_10000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl')

In [None]:

def necessary_budget(setup_file,error_value):
    
    '''
    takes a str: setup file as argument and compare the methods based on this setup to find how much redundany is needed 
    to achieve a same value of error for each model
    '''

    params=load_experiment_setup(setup_file) #open setup file
    np.random.seed(params[7])
    
    models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(), 
          "conservative":OneShotConservative(),
          "conservative2":OneShotConservative2(),
          "10shot":KShot(greedyness=0.1)
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
    
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    points=params[5]
    sigmas = params[6]
    ann_set = annotator_population.sample(n_annotators)
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
    
    list_redond=[]
    for model_name, m in models.items():
        metrics_value=100
        redundancy=2
        while metrics_value>error_value:
            results=[]
            redundancy+=1
            exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
            repeats=50
            result = {"model": model_name}
            for i in range(repeats):
                exp.reset()
                this_run = m.run(exp)
                result["iteration"] = i
                for metric_name, metric in metrics.items():
                    result["metric"] = metric_name
                    result["value"] = metric(exp, this_run)
                    results.append(result.copy())
            metrics_value=np.mean(np.array([results[i]['value'] for i in range(0,len(results),2)]))
        list_redond.append(redundancy)
        print(metrics_value)
    #plt.figure()
    #plt.title("budget necessary for each model to have error below threshold")
    #plt.bar(list(models.keys()),list_redond,color=['r','g','b'])
    #plt.show()
    
    sn.set_style("whitegrid")  # Utilisation d'un style de grille blanche pour améliorer la lisibilité
    sn.barplot(x=list(models.keys()), y=list_redond, palette="viridis")

    # Ajouter des étiquettes pour le titre et les axes
    plt.title("Necessary budget to obtain error below threshold of " + str(error_value))
    plt.xlabel("Models")
    plt.ylabel("Necessary budget (redundancy)")

    # Afficher le graphique
    plt.show()




In [None]:
#find the redundance value necessary to go below a given value of error for each model
necessary_budget('np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl',0.01)

In [None]:

def necessary_budget_continuous(setup_file):
    
    '''
    takes a str: setup file as argument and compare the methods based on this setup to find how much redundany is needed 
    to achieve a same value of error for each model and for te whole spectrum of error value
    '''
    

    params=load_experiment_setup(setup_file) #open setup file
    #np.random.seed(params[7])
    
    models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(),
          #"bayesian":OneShotBayesian(),
          "conservative":OneShotConservative(),
          "conservative2":OneShotConservative2(),
          "10shot":KShot(greedyness=0.1)
         }
    name_list=list(models.keys())
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
    
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    points=params[5]
    sigmas = params[6]
    ann_set = annotator_population.sample(n_annotators)
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
    

    model_curves=[]
    for model_name, m in models.items():
        print(model_name)
        error_list=[]
        for redundancy in range(2,15):
            results=[]
            exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
            repeats=30
            result = {"model": model_name}
            for i in range(repeats):
                exp.reset()
                this_run = m.run(exp)
                result["iteration"] = i
                for metric_name, metric in metrics.items():
                    result["metric"] = metric_name
                    result["value"] = metric(exp, this_run)
                    results.append(result.copy())
            metrics_value=np.mean(np.array([results[i]['value'] for i in range(0,len(results),2)]))
            error_list.append(metrics_value)
        model_curves.append(([k for k in range(2,15)],np.array([np.log10(k) for k in error_list])))
            
    fig=plt.figure()
    ax = fig.add_subplot(111)
    for k,elem in enumerate(model_curves):
        ax.plot(elem[1],elem[0],label=name_list[k])
   
    ax.set_xlabel("log error values") 
    
    
    #inversed axis
    ax.spines['left'].set_visible(False)
    ax.yaxis.set_ticks_position('right')
    ax.yaxis.set_label_position("right")
    ax.set_ylabel("redundancy") 

    plt.legend()
    plt.show()
    






In [None]:
necessary_budget_continuous('np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl')

In [None]:
"""
"nb_points": list_param[0],
"nb_annotators": list_param[1],
"redundancy":list_param[2],
"sigma_distrib": list_param[3],
"point_distrib": list_param[4],
"points": points,
"sigmas": list_true_sig,
"random_seed": 1234"""

In [None]:
#run a single experiments (one set of params) and get the true pos, true sigmas, and predicted pos, predicted sigmas

def run_exp_from_setup(setup,models):
    
    '''
    takes the setup as a list as input and a dictionnary containing one model {name:model}
    return [true_point,true_sig,pred_points,pred_sigms]
    '''
    
    np.random.seed(setup[7]) #random seed set for determinism
    n_points,n_annotators,redundancy=(setup[0],setup[1],setup[2]) #choice of general parameters
    sig_distr=setup[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
   
    ann_set = annotator_population.sample(n_annotators)
    
    point_distr=setup[4] #choice of point distrib
    
    points = setup[5]
        
    list_true_sig = setup[6] 
   
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    
    list_sigm_pred=[]
    list_point_pred=[]
      
    for model_name, m in models.items():

        #result = {"model": model_name}
        exp.reset()
        this_run = m.run(exp)
        locations=this_run["locations"]
        sigmas=this_run["sigmas"]
        list_sigm_pred.append(sigmas)
        list_point_pred.append(locations)
        #for metric_name, metric in metrics.items():
        #result["metric"] = metric_name
        #result["value"] = metric(exp, this_run)
        #results.append(result.copy())
    return points,list_true_sig,list_point_pred,list_sigm_pred


In [None]:
setup1=load_experiment_setup('np_10000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl') #load the experiment setup as a list

In [None]:
test=run_exp_from_setup(setup1,{"10shot":KShot(greedyness=0.1)})
test2=run_exp_from_setup(setup1,{"direct":OneShotDirect()}) #run an experiment from setup
#save_experiment(setup1,test)
print(len(test))

plt.figure()
plt.title("distribution of ground-truth points")
plt.hist(test[2])
plt.show()

plt.figure()
plt.title("distribution of predicted points")
plt.hist(test2[2])
plt.show()

In [None]:
res=runexp(params) #launch the experiment
#save_experiment(params,res) #save the experiment

print(res)

In [None]:
#function to plot and compare from a list of params
def run_and_plot(list_setup_file):
    
    list_df=[]
    list_duration=[]
    for element in list_setup_file:
        print("element:",element)
        models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(),
          #"bayesian":OneShotBayesian(),
          "conservative":OneShotConservative(),
          "conservative2":OneShotConservative2(),
          "10shot":KShot(greedyness=0.1)
         }
        metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
        
        params=load_experiment_setup(element) #get the params for setup file k
        
        random.seed(params[7])
        
        n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
        #points = point_distribution.rvs(n_points)
        points = params[5]
        sigmas = params[6]
        
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1)) #here we decide the distribution of sigmas
        
        ann_set = annotator_population.sample(n_annotators) #not usefull because already have sigmas*
        for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
            elem._sigma=sigmas[k]
       
        
        start_time = time.time()
        exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
        res = compare(models, exp, metrics, 30)
        results=res[0] #get the results
        #print(results)
        durations=res[1]
      
        list_duration.append(durations)
        df=pd.DataFrame(results)
        list_df.append(df)
        
    #print("duration times:",list_duration)
    #create the box plot
    fig, axs = plt.subplots(1,len(list_setup_file), figsize=(10, 5),sharey=True)
    for i, elem in enumerate(list_df):
        sn.boxplot(data=elem[elem["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs[i])
        integers = re.findall(r'\d+',list_setup_file[i])
        int_tuple = tuple(int(k) for k in integers)
        axs[i].set_title(str(int_tuple))
        axs[i].set(xlabel="Model", ylabel="Mean_location_norm_error")
       
    #titre = input("Enter a title : ")
    #fig.suptitle(titre)
    #"Constant number of points, varying number of annotators"
    
    #create the distribution plot
    fig2, axs2 = plt.subplots(1,len(list_setup_file), figsize=(15, 5),sharey=True,sharex=True)
    for ax in axs2:
        ax.grid()
    for i,elem in enumerate(list_df):
        plot_models = ["mean","direct","iterative","conservative","conservative2","10shot"]
        le = elem[elem["metric"]=="mean_location_norm_error"]
        for n in plot_models:
            le1 = le[le["model"]==n]
            sn.kdeplot(le1["value"],ax=axs2[i])
            integers = re.findall(r'\d+',list_setup_file[i])
            int_tuple = tuple(int(k) for k in integers)
            axs2[i].set_title(str(int_tuple))
            axs2[i].set(xlabel="mean_location_norm_error", ylabel="density")

        #fig.suptitle(titre)
    plt.show()
    #plt.legend(plot_models)
    #plt.figure("2")
    #plt.title("duration for each model")
    #plt.plot(list_duration)
    #plt.show()
    return(list_duration)

In [None]:
#this takes te two setups and compare
dur=run_and_plot(['np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl','np_10000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl','np_100000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl'])
print(dur)
plt.figure()
list_label=["mean","direct","iterative","conservative","conservative2","10shot"]
for k in range(len(dur[0])):
    plt.plot([dur[0][k],dur[1][k],dur[2][k]],label=list_label[k])
plt.legend()
plt.show()

In [None]:
#function to plot and compare from a list of params
def greedyness_impact(setup_file):
     
    list_df=[]
    models = {
          "greed=0.001":KShot(greedyness=0.1),
          "greed=0.01":KShot(greedyness=0.2),
          "greed=0.1":KShot(greedyness=0.3),
          "greed=1":KShot(greedyness=0.4),
          "greed=10":KShot(greedyness=0.5)   
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
        
    params=load_experiment_setup(setup_file) #get the params from the setup file
        
    random.seed(params[7])
        
    n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
    #points = point_distribution.rvs(n_points)
    points = params[5]   
    sigmas = params[6]
        
    annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1)) #here we decide the distribution of sigmas
        
    ann_set = annotator_population.sample(n_annotators) #not usefull because already have sigmas*
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
       
    
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 100)[0]
    df=pd.DataFrame(results)
    list_df.append(df)
 
    #create the box plot
    fig, axs = plt.subplots(1,1, figsize=(10, 5),sharey=True)
    for i, elem in enumerate(list_df):
        sn.boxplot(data=elem[elem["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs)
    
        #axs.set_title(str(int_tuple))
        axs.set(xlabel="Model", ylabel="Mean_location_norm_error")
       
    #titre = input("Enter a title : ")
    #fig.suptitle(titre)
    #"Constant number of points, varying number of annotators"

In [None]:
greedyness_impact("np_10000_na_50_rd_7_sd_uniform_pd_uniform_setup.pkl")

In [None]:
def entropy(probabilities):
    """Calculate the entropy of a probability distribution.

    Arguments:
    probabilities -- a list or array of probabilities that sum to 1

    Returns:
    The entropy of the probability distribution, in bits.
    """

    # Calculate the entropy
    h = 0.0
    for p in probabilities:
        if p > 0:
            h -= p * np.log2(p)
    return h

In [None]:
#function to plot and compare from a list of params
def entropy_plot(setup_file):

    params=load_experiment_setup(setup_file) #get the params from the setup file
    random.seed(params[7])
    #points = point_distribution.rvs(n_points)
    points = params[5]   
    sigmas = params[6]
    
    list_entropy=[]
    greedyness_list=np.array([0.001,0.01,0.1,1,5,10,20,30,100])
    list_entropy=[entropy(softmax_stable(k*np.array(sigmas))) for k in greedyness_list]
    
    plt.figure()
    plt.title("entropy of the distribution of points depending on greedyness")
    plt.plot(np.log10(greedyness_list),list_entropy)
    plt.xlabel("greedyness")
    plt.ylabel("entropy")
    
    plt.show()
    
       
    
    

In [None]:
entropy_plot("np_10000_na_50_rd_7_sd_uniform_pd_uniform_setup.pkl")

In [None]:
#function to plot and compare from a list of params
def impact_of_distrib(setup_file):
    
    

 
    models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(), 
          "conservative2":OneShotConservative2(),
          "conservative":OneShotConservative(),
          "10shot":KShot(greedyness=0.1)
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
        
    params=load_experiment_setup(setup_file) #get the params from the setup file
        
    random.seed(params[7])
        
    n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
    #points = point_distribution.rvs(n_points)
    
    sigmas = params[6]
        
    annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1)) #here we decide the distribution of sigmas
        
    ann_set = annotator_population.sample(n_annotators) #not usefull because already have sigmas*
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
       
    
    #test1: uniform
    list_df1=[]
    points = params[5]
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 50)[0]
    df1=pd.DataFrame(results)
   
    
    #test2: tokyo
    points = tok(n_points)
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 50)[0]
    df2=pd.DataFrame(results)
    for i in range(len(df2["model"])):
        df2.loc[i, "model"] = df2.loc[i, "model"] + "1"
    df_concat = pd.concat([df1, df2])
    
    
    #create the box plot
    fig, axs = plt.subplots(1,1, figsize=(10, 5),sharey=True)
    sn.boxplot(data=df_concat[df_concat["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs)
    
    
    axs.set_title("a")
    axs.set(xlabel="Model", ylabel="Mean_location_norm_error")
    
    #fig2, axs2 = plt.subplots(1,1, figsize=(5, 5),sharey=True)
    #sn.boxplot(data=df2[df2["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs2)
    
    
    #axs2.set_title("a")
    #axs2.set(xlabel="Model", ylabel="Mean_location_norm_error")
       
    #titre = input("Enter a title : ")
    #fig.suptitle(titre)
    #"Constant number of points, varying number of annotators"

In [None]:
impact_of_distrib('np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl')

In [None]:
#parameters are defined as a list of this shape:

#param=[n_points,n_annotators,redundancy,sigma_distrib,point_distrib,model_name]

In [None]:
# saving experiments setup

import pickle

def save_experiment_setup(params):
    # Create a dictionary containing experiment data
    
    np.random.seed(1234) #we set a seed to generate each time the same points/sigmas
    random.seed(1234)
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    point_distr=params[4] #choice of point distrib
    
    if point_distr=='uniform':
        point_distribution = uniform()
        points = point_distribution.rvs(n_points)
    else:
        points =  tok(n_points)
        
    list_tru_sig=[]
    ann_set = annotator_population.sample(n_annotators)
    list_true_sig=[ann_set.annotators[k]._sigma for k in range(len(ann_set.annotators))]
    #print(list_true_sig)
    experiment_data = {
        "nb_points": params[0],
        "nb_annotators": params[1],
        "redundancy": params[2],
        "sigma_distrib": params[3],
        "point_distrib": params[4],
        "points": points,
        "sigmas": list_true_sig,
        "random_seed": np.random.randint(0,10000)
    }
    
    filename = f"np_{params[0]}_na_{params[1]}_rd_{params[2]}_sd_{params[3]}_pd_{params[4]}_setup.pkl"
    # Save the experiment data to a file using pickle
    with open(filename, "wb") as f:
        pickle.dump(experiment_data, f, protocol=5)



In [None]:
# save an experiment setup in a pickle file along with a random seed
save_experiment_setup([1000,50,3,'uniform','tokyo'])
save_experiment_setup([10000,50,3,'uniform','tokyo'])
save_experiment_setup([100000,50,3,'uniform','uniform'])

save_experiment_setup([1000,50,5,'uniform','uniform'])
save_experiment_setup([10000,50,5,'uniform','uniform'])
save_experiment_setup([100000,50,5,'uniform','uniform'])

save_experiment_setup([1000,50,7,'uniform','uniform'])
save_experiment_setup([10000,50,7,'uniform','uniform'])
save_experiment_setup([100000,50,7,'uniform','uniform'])

save_experiment_setup([1000,50,15,'uniform','uniform'])
save_experiment_setup([10000,50,15,'uniform','uniform'])
save_experiment_setup([100000,50,15,'uniform','uniform'])



save_experiment_setup([1000,30,3,'uniform','uniform'])
save_experiment_setup([10000,30,3,'uniform','uniform'])
save_experiment_setup([100000,30,3,'uniform','uniform'])

save_experiment_setup([100,30,3,'uniform','uniform'])
save_experiment_setup([100,30,5,'uniform','uniform'])
save_experiment_setup([100,30,7,'uniform','uniform'])

save_experiment_setup([1000,100,3,'uniform','uniform'])
save_experiment_setup([10000,100,3,'uniform','uniform'])
save_experiment_setup([100000,100,3,'uniform','uniform'])

In [None]:
# saving experiments setup (only the setup, not te results)

import pickle

def save_experiment(list_param,results):
    # Create a dictionary containing experiment data
    experiment_data = {
        "nb_points": list_param[0],
        "nb_annotators": list_param[1],
        "redundancy":list_param[2],
        "sigma_distrib": list_param[3],
        "point_distrib": list_param[4],
        "points": results[0],
        "sigmas": results[1],
        "method_results": {
                "name": list_param[5],
                "points":results[2][0],
                "sigmas":results[3][0]
            }
    }
            
            #,
            #"conservative2": {
            #    "points":results[2][1] ,
            #    "sigmas": results[3][1]
            #},
            # "10shot": {
            #    "points": results[2][2],
            #    "sigmas": results[3][2]
           # }
        #}
      #  }
    
    filename = f"np_{list_param[0]}_na_{list_param[1]}_rd_{list_param[2]}_sd_{list_param[3]}_pd_{list_param[4]}.pkl"
    # Save the experiment data to a file using pickle
    with open(filename, "wb") as f:
        pickle.dump(experiment_data, f, protocol=5)



In [None]:
#file_path = os.path.join('..','..', "np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl")

setup1=load_experiment_setup('np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl')
setup2=load_experiment_setup('np_1000_na_10_rd_3_sd_uniform_pd_uniform_setup.pkl')
#print(setup1)

In [None]:
print(setup1[6][0:20]) #making sure that the points distributions are the same for two different experiment setups
print(setup2[6][0:20])