In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2


from crowdgeoloc.one_d import sample_tokyo_latitudes, generate_annotators, annotate, \
FunctionNormalAnnotatorPopulation, SimpleNormalAnnotatorPopulation
from crowdgeoloc.experiment import ActiveAnnotationContest, mean_location_norm_error, mean_sigma_norm_error, \
    softmax_stable, load_experiment, load_experiment_setup, save_experiment, save_experiment_setup
from crowdgeoloc.fixed import OneShotDirect,OneShotBayesian, OneShotConservative, OneShotConservative2, OneShotIterative, \
                                OneShotMean, KShot
from crowdgeoloc.fixed import OneShotDirect,OneShotBayesian, OneShotConservative, OneShotConservative2, OneShotIterative, \
                                OneShotMean, KShot
from crowdgeoloc.spatial import OneShotSpatialBayesian, KShotSpatial
import seaborn
from seaborn import color_palette
from scipy.interpolate import CubicSpline

import pandas as pd
import pickle
import random
import re
import time
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, uniform
import cmdstanpy as cmd
import seaborn as sn
import os

## CONSTANT VARIANCE PROFILE

In [None]:
#creates the setup files for the constant variance experiments
save_experiment_setup([100000,50,3,'uniform','uniform'])
save_experiment_setup([50000,50,3,'uniform','uniform'])

In [None]:

def tok(nb_points):
    '''function to create the tokyo distribution array'''
    tab=sample_tokyo_latitudes(nb_points)
    tab=(tab-np.min(tab))/(np.max(tab)-np.min(tab))
    return(tab)


In [None]:

def compare(models, exp, metrics, repeats):
    '''
    function to compare models by running the experiments the number of repeats given and evaluate them using metrics provided
    '''
    results = []
    duration_list=[]
    for model_name, m in models.items():
        #print(model_name)
        result = {"model": model_name}
        start_time = time.time()
        for i in range(repeats):
            exp.reset()
            this_run = m.run(exp)
            result["iteration"] = i
            for metric_name, metric in metrics.items():
                result["metric"] = metric_name
                result["value"] = metric(exp, this_run)
                results.append(result.copy())
        end_time=time.time()
        duration=end_time-start_time
        duration_list.append(duration)
    return results,duration_list

In [None]:

def runexp(setup_file):
    
    '''
    takes a str: setup file name as argument and compare the methods based on this setup
    '''

    params=load_experiment_setup(setup_file) #open setup file
    np.random.seed(params[7])
    
    models = {#"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          #"iterative":OneShotIterative(), 
          "conservative2":OneShotConservative2(),
          #"conservative":OneShotConservative(),
          "10shot":KShot(greediness=1.2)
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
    
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    point_distr=params[4] #choice of point distrib
    
    if point_distr=='uniform':
        point_distribution = uniform()
        points = point_distribution.rvs(n_points)
    else:
        points =  tok(n_points)
        
    list_tru_sig=[]
    ann_set = annotator_population.sample(n_annotators)
    list_true_sig=[ann_set.annotators[k]._sigma for k in range(len(ann_set.annotators))]
    
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    list_sigm_pred=[]
    list_point_pred=[]
    for model_name, m in models.items():
        print(model_name)
        result = {"model": model_name}
        exp.reset()
        this_run = m.run(exp)
        locations=this_run["locations"]
        sigmas=this_run["sigmas"]
        list_sigm_pred.append(sigmas)
        list_point_pred.append(locations)
    return points,list_true_sig,list_point_pred,list_sigm_pred

In [None]:

def run_and_plot(list_setup_file):
    '''
    takes a list of setup files as arguments and plot the metrics bar chart for each of the setups, return the list of durations of the different methods 
    '''
    
    list_df=[]
    list_duration=[]
    for element in list_setup_file:
        print("element:",element)
        models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(),
          "conserv":OneShotConservative(),
          "conserv2":OneShotConservative2(),
          #"bayesian":OneShotBayesian(),
          "10shot":KShot(greediness=1.1)
         }
        
        name_list=list(models.keys())
        metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
        
        params=load_experiment_setup(element)
        
        random.seed(params[7])
        
        n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
        points = params[5]
        sigmas = params[6]
        
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
        
        ann_set = annotator_population.sample(n_annotators) 
        for k,elem in enumerate(ann_set.annotators):
            elem._sigma=sigmas[k]
       
        
        exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
        res = compare(models, exp, metrics, 100)
        results=res[0] 
        durations=res[1]
      
        list_duration.append(durations)
        df=pd.DataFrame(results)
        list_df.append(df)
        #print(results)
        

    #create the box plot
    fig, axs = plt.subplots(1,len(list_setup_file), figsize=(10, 5),sharey=True)
    for i, elem in enumerate(list_df):
        sn.boxplot(data=elem[elem["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs[i])  #ATTENTION #[i])
        integers = re.findall(r'\d+',list_setup_file[i])
        int_tuple = tuple(int(k) for k in integers)
        axs[i].set_title(str(int_tuple))
        axs[i].set(xlabel="Model", ylabel="Mean location norm error") #ATTENTION axi[i]
        axs[i].set_xticklabels(name_list,rotation=45, ha='right') #axs[i].get_xticks()
       
 
    return(list_duration)

In [None]:
#exemple on how to run the run_and_plot function
run_and_plot(['np_1000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl','np_10000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl','np_100000_na_50_rd_3_sd_uniform_pd_uniform_setup.pkl'])

In [None]:

def necessary_budget_continuous(setup_file):
    
    '''
    takes a str: setup file as argument and compare the methods based on this setup to find how much redundany is needed 
    to achieve a same value of error for each model, then plot the results
    '''
    

    params=load_experiment_setup(setup_file) #open setup file
    #np.random.seed(params[7])
    
    models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(),
          #"bayesian":OneShotBayesian(),
          "conservative":OneShotConservative(),
          "conservative2":OneShotConservative2(),
          "10shot-1.1":KShot(greediness=1.1),
          "10shot-1.4":KShot(greediness=1.4)
         }
    name_list=list(models.keys())
    metrics = {"mean_location_norm_error":mean_location_norm_error}
    #"mean_sigma_norm_error":mean_sigma_norm_error}
    
    n_points,n_annotators,redundancy=(params[0],params[1],params[2]) #choice of general parameters
    sig_distr=params[3] #choice of the sigma distrib
    if sig_distr=='uniform':
        annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1))
    if sig_distr=='beta':
        annotator_population = SimpleNormalAnnotatorPopulation()
    
    points=params[5]
    sigmas = params[6]
    ann_set = annotator_population.sample(n_annotators)
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
    

    model_curves=[]
    for model_name, m in models.items():
        print(model_name)
        error_list=[]
        for redundancy in range(2,40):
            results=[]
            exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
            repeats=60
            result = {"model": model_name}
            for i in range(repeats):
                exp.reset()
                this_run = m.run(exp)
                result["iteration"] = i
                for metric_name, metric in metrics.items():
                    result["metric"] = metric_name
                    result["value"] = metric(exp, this_run)
                    results.append(result.copy())
                
        
            metrics_value=np.mean(np.array([results[i]['value'] for i in range(0,len(results),2)]))
            error_list.append(metrics_value)
     
        model_curves.append([[k for k in range(2,40)],np.array([k for k in error_list])])
        
            
    fig=plt.figure()
    ax = fig.add_subplot(111)
    for k,elem in enumerate(model_curves):
        if name_list[k]=="10shot-1.1" or name_list[k]=="10shot-1.4" :
            ax.plot(elem[1],elem[0],label=name_list[k])
        else:
            ax.plot(elem[1],elem[0],label=name_list[k],linestyle="dashed")
        
   
    ax.set_xlabel("error values") 
    
    
    #inversed axis
    ax.spines['left'].set_visible(False)
    ax.yaxis.set_ticks_position('right')
    ax.yaxis.set_label_position("right")
    ax.set_ylabel("redundancy") 

    plt.legend()
    plt.xscale('log')
    plt.show()
    


In [None]:

def greedyness_impact(setup_file,greediness_list):
    '''
    takes a list of greediness to test and a setup file and plot the location error depending on greediness
    '''
     
    list_df=[]
    models = {}
    for greediness in greediness_list:
        models[f"greed={greediness}"] = KShot(greediness=greediness)
    metrics = {"mean_location_norm_error":mean_location_norm_error}
    
    name_list=list(models.keys())
    new_labels=[k[6:] for k in name_list]
    print(name_list)
    
    params=load_experiment_setup(setup_file) #get the params from the setup file
        
    random.seed(params[7])
        
    n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
    #points = point_distribution.rvs(n_points)
    points = params[5]   
    sigmas = params[6]
        
    annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1)) #here we decide the distribution of sigmas
        
    ann_set = annotator_population.sample(n_annotators) #not usefull because already have sigmas*
    for k,elem in enumerate(ann_set.annotators): #we put the setup sigmas
        elem._sigma=sigmas[k]
       
    
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 50)[0]
    df=pd.DataFrame(results)
    list_df.append(df)
 
    #create the box plot
    fig, axs = plt.subplots(1,1, figsize=(10, 5),sharey=True)
    for i, elem in enumerate(list_df):
        sn.boxplot(data=elem[elem["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs,boxprops=dict(facecolor='none', edgecolor='black', linewidth=1.5))
    
        #axs.set_title(str(int_tuple))
        axs.set(xlabel="greedyness", ylabel="Mean_location_norm_error")
        axs.set_xticklabels(new_labels)
       
    #titre = input("Enter a title : ")
    #fig.suptitle(titre)
    #"Constant number of points, varying number of annotators"

In [None]:

def impact_of_distrib(setup_file):
    ''' 
    plot the comparison of the boxplots of the metrics in a uniform vs tokyo distribution
    '''
 
    models = {"mean": OneShotMean(),
          "direct":OneShotDirect(), 
          "iterative":OneShotIterative(), 
          "conservative":OneShotConservative(),
          "conservative2":OneShotConservative2(),
          "10shot":KShot(greediness=1.1)
         }
    metrics = {"mean_location_norm_error":mean_location_norm_error,
          "mean_sigma_norm_error":mean_sigma_norm_error}
        
    params=load_experiment_setup(setup_file) 
        
    random.seed(params[7])
        
    n_points,n_annotators,redundancy=(params[0],params[1],params[2])
        
    
    sigmas = params[6]
        
    annotator_population = SimpleNormalAnnotatorPopulation(uniform(scale=0.1)) 
        
    ann_set = annotator_population.sample(n_annotators) 
    for k,elem in enumerate(ann_set.annotators):
        elem._sigma=sigmas[k]
       
    
    #test1: uniform
    list_df1=[]
    points = params[5]
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 30)[0] 
    df1=pd.DataFrame(results)
   
    
    #test2: tokyo
    points = tok(n_points)
    exp = ActiveAnnotationContest(points, ann_set, max_total_annotations=n_points*redundancy)
    results = compare(models, exp, metrics, 30)[0] 
    df2=pd.DataFrame(results)
    
    
    for i in range(len(df2["model"])):
        df2.loc[i, "model"] = df2.loc[i, "model"] + "1"
    df_concat = pd.concat([df1, df2])
    
    
    #create the box plot
    fig, axs = plt.subplots(1,1, figsize=(10, 5),sharey=True)
    new_labels = ["mean", "direct", "iter","cons","cons2","mean", "direct", "iter","cons","cons2"]
    new_colors = seaborn.color_palette("flare", n_colors=len(new_labels))
    median_vals = df_concat.groupby("model")["value"].mean().sort_values()
    sn.boxplot(data=df_concat[df_concat["metric"]=="mean_location_norm_error"], x="model", y="value", ax=axs,palette=dict(zip(median_vals.index, new_colors)))
    
    axs.set_xticklabels(new_labels)

    # Add text
    axs.text(0.2, 0.9, 'Uniform', transform=axs.transAxes, fontsize=14, verticalalignment='top')
    axs.text(0.7, 0.9, 'Tokyo', transform=axs.transAxes, fontsize=14, verticalalignment='top')

    
    x_middle = len(df_concat["model"].unique()) / 2 
    axs.axvline(x=x_middle-0.5, color='r', linestyle='--')

    axs.set(xlabel="Model", ylabel="Mean location norm error")