## Running Example

In [1]:
import crowded.simulate as cs
import crowded.method as cm
import crowded.make as mk
from pycm import *

#### Simulate the tasks

In [2]:
total_tasks = 524
p_hard_tasks = 0.0
number_of_valid_answers = 3 

In [3]:
df_tasks = cs.Tasks(number_of_valid_answers).create(total_tasks, p_hard_tasks)
df_tasks.head()

Unnamed: 0_level_0,task_id,true_answers,label_task,prob_task
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
task_PwGvSaUebMKm,task_PwGvSaUebMKm,galley,easy_task,0.52
task_7PNGwKRPQzrH,task_7PNGwKRPQzrH,feature,easy_task,0.71
task_GnhSH4XvywdX,task_GnhSH4XvywdX,galley,easy_task,0.83
task_8pgpkZHMnt8i,task_8pgpkZHMnt8i,galley,easy_task,0.61
task_QHpbyXrCF8gf,task_QHpbyXrCF8gf,galley,easy_task,0.74


In [4]:
valid_answers = df_tasks['true_answers'].unique()
print(valid_answers)



#### Simulate the workers

In [5]:
total_workers = 32

In [6]:
workers = cs.Workers().create(total_workers)
workers.head()

Unnamed: 0_level_0,prob_worker,worker_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1
BXTYFNeWk56s,0.958197,BXTYFNeWk56s
cY8LY4s5zSNt,0.910832,cY8LY4s5zSNt
hXXHb5VWjmMw,0.887818,hXXHb5VWjmMw
7ycLTDeApgvA,0.956558,7ycLTDeApgvA
zc9JFr7gSUsy,0.876866,zc9JFr7gSUsy


#### Split the tasks

In [7]:
percentage_to_train = 0.3

In [8]:
tasks_train, tasks_rest = mk.tasks_split(df_tasks, percentage_to_train)

#### Assigning workers to tasks

In [9]:
workers_per_task = 3

In [10]:
df_tw = cs.AssignTasks(tasks_train, workers, workers_per_task).create()
df_tw.head()

Unnamed: 0,task_id,worker_id,true_answers,label_task,prob_task,prob_worker
0,task_dJyGi3gbUNCA,C5MhxTzVDspC,feature,easy_task,0.77,0.86024
1,task_dJyGi3gbUNCA,4cY7czUtbrgH,feature,easy_task,0.77,0.934127
2,task_dJyGi3gbUNCA,wMeg9LYuu6S6,feature,easy_task,0.77,0.797711
3,task_iCQSpxCjmrcj,eKTx3N9WDgNh,galley,easy_task,0.92,0.919626
4,task_iCQSpxCjmrcj,wMeg9LYuu6S6,galley,easy_task,0.92,0.797711


#### Compute the probability to assess the tasks

In [11]:
cp = cm.ComputeProbability(df_tw['prob_task'], df_tw['prob_worker'], valid_answers)
df_tw['worker_answers'] = cm.WorkerAnswer(df_tw['true_answers'], cp.predict(), valid_answers).match()
df_tw['performance'] = cp.predict()
df_tw.head()

Unnamed: 0,task_id,worker_id,true_answers,label_task,prob_task,prob_worker,worker_answers,performance
0,task_dJyGi3gbUNCA,C5MhxTzVDspC,feature,easy_task,0.77,0.86024,feature,1
1,task_dJyGi3gbUNCA,4cY7czUtbrgH,feature,easy_task,0.77,0.934127,feature,1
2,task_dJyGi3gbUNCA,wMeg9LYuu6S6,feature,easy_task,0.77,0.797711,feature,1
3,task_iCQSpxCjmrcj,eKTx3N9WDgNh,galley,easy_task,0.92,0.919626,galley,1
4,task_iCQSpxCjmrcj,wMeg9LYuu6S6,galley,easy_task,0.92,0.797711,galley,1


#### Assess the performance and get the good workers

In [12]:
perf = cm.Performance(df_tw)
trained_workers = perf.trained_workers()
trained_workers.head()

Unnamed: 0_level_0,prob_worker,worker_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1
3hhdur8c6mgE,0.940222,3hhdur8c6mgE
okgctt4SxXB9,0.991034,okgctt4SxXB9
msxBBYBNbKNC,0.952057,msxBBYBNbKNC
ca6ivbyszVEv,0.953135,ca6ivbyszVEv
4cY7czUtbrgH,0.989063,4cY7czUtbrgH


#### Assign the trained workers to the rest of the tasks

In [13]:
df_tw_2 = cs.AssignTasks(tasks_rest, trained_workers, workers_per_task).create()
df_tw_2.head()

Unnamed: 0,task_id,worker_id,true_answers,label_task,prob_task,prob_worker
0,task_2572ceeyEuuK,MxLZS4CqSp9d,feature,easy_task,0.68,0.990189
1,task_2572ceeyEuuK,7ycLTDeApgvA,feature,easy_task,0.68,0.991252
2,task_2572ceeyEuuK,NVrSgwSN3u7m,feature,easy_task,0.68,0.9855
3,task_2GxL4r9qFyyd,6qrhoVsiGxU4,feature,easy_task,0.55,0.997735
4,task_2GxL4r9qFyyd,ca6ivbyszVEv,feature,easy_task,0.55,0.953135


#### Compute probability to the rest of the tasks

In [14]:
cp2 = cm.ComputeProbability(df_tw_2['prob_task'], df_tw_2['prob_worker'], valid_answers)
df_tw_2['worker_answers'] = cm.WorkerAnswer(df_tw_2['true_answers'], cp2.predict(), valid_answers).match()
df_tw_2['performance'] = cp2.predict()
df_tw_2.head()

Unnamed: 0,task_id,worker_id,true_answers,label_task,prob_task,prob_worker,worker_answers,performance
0,task_2572ceeyEuuK,MxLZS4CqSp9d,feature,easy_task,0.68,0.990189,feature,1
1,task_2572ceeyEuuK,7ycLTDeApgvA,feature,easy_task,0.68,0.991252,feature,1
2,task_2572ceeyEuuK,NVrSgwSN3u7m,feature,easy_task,0.68,0.9855,feature,1
3,task_2GxL4r9qFyyd,6qrhoVsiGxU4,feature,easy_task,0.55,0.997735,feature,1
4,task_2GxL4r9qFyyd,ca6ivbyszVEv,feature,easy_task,0.55,0.953135,feature,1


#### Merge the data and get the overall accuracy

In [15]:
df = df_tw.append(df_tw_2)

In [16]:
matrix = ConfusionMatrix(df['true_answers'].tolist(), df['worker_answers'].tolist())

In [17]:
print('Accuracy of the simulated experiment considering 2 Stages: {}%\n{}'.format(round(matrix.Overall_ACC*100,2), matrix.matrix()))

Actual
feature          459      5        1        
galley           2        565      3        

Accuracy of the simultaed experiment considering 2 Stages: 99.11%
None


#### Compare with NO stage

In [18]:
df_tw1 = cs.AssignTasks(df_tasks, workers, workers_per_task).create()
cp1 = cm.ComputeProbability(df_tw1['prob_task'], df_tw1['prob_worker'], valid_answers)
df_tw1['worker_answers'] = cm.WorkerAnswer(df_tw1['true_answers'], cp1.predict(), valid_answers).match()
df_tw1['performance'] = cp1.predict()

In [19]:
matrix1 = ConfusionMatrix(df_tw1['true_answers'].tolist(), df_tw1['worker_answers'].tolist())

In [20]:
print('Accuracy of the simulated experiment considering NO Stages: {}%\n{}'.format(round(matrix1.Overall_ACC*100,2), matrix1.matrix()))

Actual
feature          446      9        10       
galley           14       547      9        

Accuracy of the simultaed experiment considering NO Stages: 96.5%
None


#####  The accuracy of 1 stage algorithm is lower than the 2 stages algorithm *