In [62]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn

In [63]:
def getfiles(start="PVR",items='120',design='v30',schemes=['fixed_express'],mis=0):
    listoffiles=[]
    where='Results/'
    for file in os.listdir(where):
        if file.startswith(start):
            num=[x for x in schemes if x in file]
            if items in file and design in file and len(num)>0 and (file.endswith('mis.csv')==mis):
                listoffiles.append(file)
                
    return where,listoffiles

In [64]:
def getcolormap():
    colormap={'TS':'Navy','TSe4':'Aqua','TSe1':'Red','TSe2':'Maroon','TSe3':'Aqua','TSthres':'Blue','TSdouble':'Navy',
              'greedy':'Green', 'greedy3':'Green','greedythres':'Green',
              'fixed_express':'Fuchsia','fixed_sparse':'Purple','uncert':'Red','mismin':'Maroon',
             'TSe4apprx':'Fuchsia','TSapprox':'Fuchsia','TSe3apprx':'Purple','TSthresapprx':'Purple',
              'TSapproxthres':'Purple','misminapprox':'Green','winmismin':'Red',
              'TSregthres':'Teal','newTSthres':'Blue','newTSregthres':'Teal','greedynightmare':'Lime',
              'winapprox':'Green','winapproxthres':'Lime',
             'TSe1d2':'Red','TSe1d4':'Maroon','TSe4d0':'Green','TSe2d0':'Green','TSe2d2':'Purple',
              'TSe4d2':'Purple','TSe2d4':'Aqua'}#:'Teal'
    return colormap

In [65]:
def getkmap():
    kmap={'TS':0,'TSe4':0,'TSe1':0,'TSe2':0,'TSthres':1,'TSdouble':1, 'greedy':0,'greedythres':1,
              'fixed_express':0,'fixed_sparse':0,'uncert':1,'mismin':1,
         'TSe4apprx':0,'TSthresapprx':1,'misminapprox':1,'TSregthres':1,
         'newTSthres':1,'newTSregthres':1,'TSe3apprx':0,'TSe3':0,'greedy3':0}
    return kmap

In [66]:
def getlabelmap():
    labelmap={'TS':'TS','TSe4':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{4}$,$\delta=\frac{1}{4}$)',
              'TSe1':'dashdot','TSe2':'dotted','TSe3':'dotted' 
             ,'TSthres':'dotted' ,'TSdouble':'dotted' ,
              'greedy':'$\epsilon$-greedy' , 'greedy3':'dotted' ,
              'greedythres':'$\epsilon$-greedy with thresholding' ,
              'fixed_express':'Express','fixed_sparse':'dotted' ,
              'uncert':'max-uncert',
              'mismin':'max-misclass',
              'TSapprox':'TSapprox','TSapproxthres':'TSapproxthres',
             'TSe4apprx':'dotted','TSe3apprx':'dotted','TSthresapprx':'dotted','misminapprox':'misminapprox',
              'TSregthres':'dotted','newTSthres':'$\epsilon$-$\delta$-diffuse TS with thresholding',
              'newTSregthres':'TS with thresholding',
              'greedynightmare':'dotted',
              'winapprox':'WinApprox','winapproxthres':'WinApprox with thresholding',
             'TSe1d2':'dotted','TSe1d4':'solid','winmismin':'winmismin',
              'TSe4d0':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{4}$,$\delta=0$)',
              'TSe2d0':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{2}$,$\delta=0$)',
              'TSe2d2':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{2}$,$\delta=\frac{1}{2}$)',
              'TSe4d2':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{4}$,$\delta=\frac{1}{2}$)',
              'TSe2d4':r'$\epsilon$-$\delta$-diffuse TS ($\epsilon=\frac{1}{2}$,$\delta=\frac{1}{4}$)'}#:'Teal'
    return labelmap

In [67]:
def calculateStop(filename="PVR-TSe4-120v20k10.csv",thresv=[.02],k=10,table=0):
    pvr=pd.read_csv('Results/'+filename)
    label=filename.split('-')[1]
    kmap=getkmap()
    if kmap[label]:
        bhist=pd.read_csv('Results/bhist-'+label+'-120v20k'+str(k)+'.csv')
    else:
        bhist=pd.read_csv('Results/bhist-'+label+'-120v20.csv')
    items=120
    if items=='300':
        itemfile='HB_300items.csv'
        nitems=300
    elif items=='40':
        itemfile='HB_40items.csv'
        nitems=40
    else:
        itemfile='HB_120items.csv'
        nitems=120
        
    itemcol=[]
    for i in range(1,nitems+1):
        itemcol+=['Item_'+str(i)]
    data1=pd.read_csv(itemfile)
    utilsall=data1.get_values()[:,2:]
    realmean=np.mean(utilsall,0)
    topk=np.argsort(realmean)[::-1][:k]
    iters=int(np.max(bhist['Iter']))
    #return pvr
    if table:
        thres=thresv
        stop=np.zeros(iters)
        for iternum in range(1,iters+1):
            try:
                st=int(min(pvr[np.logical_and(np.logical_and(pvr['Iter']==iternum,pvr['PVR']<thres),pvr['RespNum']>60)]['RespNum'].get_values()))
            except Exception:
                st=500
            stop[iternum-1]=min(st,500)
        return stop
    else:
        N=len(thresv)
        stop=np.zeros((N,iters))
        hr=np.zeros((N,iters))
        for i,thres in enumerate(thresv):
            for iternum in range(1,iters+1):
                try:
                    st=int(min(pvr[np.logical_and(np.logical_and(pvr['Iter']==iternum,pvr['PVR']<thres),pvr['RespNum']>60)]['RespNum'].get_values()))
                except Exception:
                    st=500
                stop[i,iternum-1]=min(st,500)
                cur=(np.argsort(bhist[np.logical_and(bhist['Iter']==iternum,bhist['RespNum']==st)][itemcol].get_values()[0])[::-1][:k])
                hr[i,iternum-1]=((len(set.intersection(set(cur),set(topk))))/k)
        return stop.mean(1),hr.mean(1)

        

In [68]:
def plotstop(name='stoppingtimes.pdf'):
    schemes1=['TS-','greedy-','TSe4-']
    schemes2=['mismin-','uncert-','greedythres-','newTSthres-','newTSregthres-']
    schemes3=['fixed_express-']
    thresv=np.linspace(.05,.01,9)
    [where,listoffiles]=getfiles(design='v20',schemes=schemes1+schemes2+schemes3)
    colormap=getcolormap()
    for file in listoffiles:
        label=file.split('-')[1]
        meanstop,meanhr=calculateStop(filename=file,thresv=thresv)
        c=colormap[label]
        #plt.subplot(311)
        plt.plot(thresv,meanstop,label=label,color=c)
        #plt.subplot(312)
        #plt.plot(thresv,meanhr,label=label,color=c)
        #plt.subplot(313)
        #plt.plot(meanhr,meanstop,label=label,color=c)
    
    #plt.subplot(311)
    plt.xlabel('Threshold')
    
    #plt.ylabel('Hit Rate')
    plt.ylabel('Stopping Resp')
    plt.title('Stopping Time based on threshold')
    plt.legend(loc='upper right',fontsize='small')
    #plt.subplot(312)
    #plt.xlabel('Threshold')
    #plt.ylabel('Hit rate at stop')
    #plt.title('Top '+str(k)+' Hit Rate Over Respondents')
    #plt.legend(loc='upper right',fontsize='small')
    #plt.subplot(313)
    #plt.ylabel('Stopping Resp')

    #plt.xlabel('Hit rate at stop')

    
    plt.savefig('plots/'+name, dpi=100)
    plt.close()

In [12]:
plotstop()


In [75]:
def histstop(name='stophis.pdf',thres=.05,schemes=[],top=.02,bins=6):
    [where,listoffiles]=getfiles(design='v20',schemes=schemes)
    colormap=getcolormap()
    labelmap=getlabelmap()
    for file in listoffiles:
        label=file.split('-')[1]
        stop=calculateStop(filename=file,thresv=thres,table=1)
        c=colormap[label]
        l=labelmap[label]
        #plt.subplot(311)
        plt.hist(stop,bins=bins,label=l,color=c,alpha=.8,edgecolor='black', linewidth=1.2,normed=True)
        plt.ylim([0,top])
        plt.xlim([0,500])
        #plt.subplot(312)
        #plt.plot(thresv,meanhr,label=label,color=c)
        #plt.subplot(313)
        #plt.plot(meanhr,meanstop,label=label,color=c)
    
    #plt.subplot(311)
    plt.xlabel('Stopping Resp')
    
    #plt.ylabel('Hit Rate')
    #plt.ylabel('Stopping Resp')
    #plt.title('Stopping Time based on threshold')
    plt.legend(loc='upper left',fontsize='small')
    #plt.subplot(312)
    #plt.xlabel('Threshold')
    #plt.ylabel('Hit rate at stop')
    #plt.title('Top '+str(k)+' Hit Rate Over Respondents')
    #plt.legend(loc='upper right',fontsize='small')
    #plt.subplot(313)
    #plt.ylabel('Stopping Resp')

    #plt.xlabel('Hit rate at stop')

    
    plt.savefig('plots/'+name, dpi=100)
    plt.close()

In [70]:
schemes1=['TS-','greedy-','TSe4-']
schemes2=['mismin-','uncert-','greedythres-','newTSthres-','newTSregthres-']
schemes3=['fixed_express-']

In [76]:
histstop(name='stophistTSgr02.pdf',thres=.02,schemes=['TS-','greedy-'],bins=10)
histstop(name='stophisted02.pdf',thres=.02,schemes=['newTSthres-','TSe4-'],bins=10)
histstop(name='stophistprob02.pdf',thres=.02,schemes=['mismin-','uncert-'],bins=10)
#histstop(name='stophistTSgr05.pdf',thres=.05,schemes=['TS-','greedy-'],top=.025)
#histstop(name='stophisted05.pdf',thres=.05,schemes=['newTSthres-','TSe4-'],top=.025)
#histstop(name='stophistprob05.pdf',thres=.05,schemes=['mismin-','uncert-'],top=.025)

In [13]:
def tablestop(thresv=[.05]):
    schemes1=['TS-','greedy-','TSe4-']
    schemes2=['mismin-','uncert-','greedythres-','newTSthres-','newTSregthres-']
    schemes3=['fixed_express-']
    [where,listoffiles]=getfiles(design='v20',schemes=schemes1+schemes2+schemes3)
    colormap=getcolormap()
    temptable=np.zeros((2,len(listoffiles)))
    cols=[]
    for ind,file in enumerate(listoffiles):
        label=file.split('-')[1]
        meanstop,meanhr=calculateStop(filename=file,thresv=thresv)
        cols+=[label]
        temptable[0,ind]=meanstop
        temptable[1,ind]=meanhr
    table=pd.DataFrame(temptable)
    table.columns=cols
    return table



In [14]:
tablestop()

Unnamed: 0,fixed_express,greedy,greedythres,mismin,newTSregthres,newTSthres,TS,TSe4,uncert
0,317.8,235.4,183.2,134.2,98.4,135.6,120.6,145.6,124.2
1,0.854,0.88,0.852,0.848,0.795,0.857,0.846,0.866,0.837


In [15]:
tablestop(thresv=[.02])

Unnamed: 0,fixed_express,greedy,greedythres,mismin,newTSregthres,newTSthres,TS,TSe4,uncert
0,497.6,389.2,288.0,226.8,140.2,222.8,205.6,247.2,219.8
1,0.893,0.918,0.902,0.911,0.822,0.911,0.907,0.912,0.909
