In [1]:
import pandas as pd
import numpy as np
import peewee
from playhouse import postgres_ext
from data_model import *
import json
import random
from pprint import pprint

# Please create a Postgres Database by running this command : 
``` 
creatdb -U postgres window_study
```

## After running the above command and making sure the database is created, you need to run the following commands line by line. 

In [2]:
pg_db = PostgresqlExtDatabase('window_study', user='postgres', host='localhost', port=5432)



## First we create the Task Types. 
for more details look at the file `data_model.py`

In [3]:
# CREATING THE TASK TYPES FOR WINDOW STUDY V2
pg_db.connect()
# RUN ONCE
pg_db.create_tables([TaskType, Task])


In [4]:
# Putting data for Window Study 2
requesters = ['req1', 'req2', 'req3', 'req4']
task_type_1 = TaskType.create(task_type_id=1,
                              task_keywords=['sentiment analysis','text categorization', 'climate change'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Sentiment Analysis - Global warming / climate change",
                              expected_pay = 0.02
                             )

task_type_2 = TaskType.create(task_type_id=2,
                              task_keywords=['word comparison', 'semantic similarity'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Similarity judgment of word combinations",
                              expected_pay = 0.03
                             )

task_type_3 = TaskType.create(task_type_id=3,
                              task_keywords=['sentence agreement', 'sentence comparison'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Agreement between short and long sentences",
                              expected_pay = 0.03
                             )
task_type_4 = TaskType.create(task_type_id=4,
                              task_keywords=['image annotation', 'image rating'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="How beautiful is this image (animals)",
                              expected_pay = 0.04
                             )

task_type_5 = TaskType.create(task_type_id=5,
                              task_keywords=['image categorization', 'sentiment analysis'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Image sentiment polarity classification",
                              expected_pay = 0.02                              
                             )

task_type_6 = TaskType.create(task_type_id=6,
                              task_keywords=[ 'image categorization','pattern recognition', 'image annotation'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Image categorization: dress patterns",
                              expected_pay = 0.05
                             )

task_type_7 = TaskType.create(task_type_id=7,
                              task_keywords=['transcription', 'image categorization','handwriting'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Transcriptions of names from handwriting",
                              expected_pay = 0.025
                             )

task_type_8 = TaskType.create(task_type_id=8,
                              task_keywords=['semantic analysis', 'semantic similarity','comparison'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="'All oranges are lemons,' a.k.a. Semantic relationships between two concepts",
                              expected_pay = 0.03
                             )

task_type_9 = TaskType.create(task_type_id=9,
                              task_keywords=['sentiment analysis', 'emotion detection','resolutions'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="2015 New Year’s resolutions",
                              expected_pay = 0.05
                             )
task_type_10 = TaskType.create(task_type_id=10,
                               task_keywords=['sentiment analysis','emotion detection','self-driving cars'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Twitter sentiment analysis: Self-driving cars",
                              expected_pay = 0.02
                              )



In [None]:
# CREATING THE TASK TYPES. 
pg_db.connect()
# RUN ONCE
pg_db.create_tables([TaskType, Task])
requesters = ['req1', 'req2', 'req3', 'req4']
task_type_1 = TaskType.create(task_type_id=1,
                              task_keywords=['sentiment analysis', 'emotion detection'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Judge emotions about nuclear energy from Twitter")

task_type_2 = TaskType.create(task_type_id=2,
                              task_keywords=['semantic analysis', 'semantic similarity', 'sentence comparison'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Decide whether two English sentences are related")

task_type_3 = TaskType.create(task_type_id=3,
                              task_keywords=['data validation', 'semantic analysis'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Sentence plausibility")
task_type_4 = TaskType.create(task_type_id=4,
                              task_keywords=['data enrichment', 'data collection'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Smartphone app functionality")
task_type_5 = TaskType.create(task_type_id=5,
                              task_keywords=['survey, academic, university research, quick', 'data enrichment'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Academy Awards demographics")
task_type_6 = TaskType.create(task_type_id=6,
                              task_keywords=[ 'image categorization','data collection', 'image annotation'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Image categorization: dress patterns")
task_type_7 = TaskType.create(task_type_id=7,
                              task_keywords=['transcription', 'image categorization'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="Transcriptions of names from handwriting")
task_type_8 = TaskType.create(task_type_id=8,
                              task_keywords=['semantic analysis', 'semantic similarity'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="'All oranges are lemons,' a.k.a. Semantic relationships between two concepts")
task_type_9 = TaskType.create(task_type_id=9,
                              task_keywords=['sentiment analysis', 'emotion detection'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="2015 New Year’s resolutions")
task_type_10 = TaskType.create(task_type_id=10,
                               task_keywords=['data collection', 'data categorization'],
                              requester=random.sample(requesters, 1)[0],
                              task_title="The data behind data scientists")




## Now we read each `csv` file and push the content of it into the database. 

In [2]:
task1 = pd.read_csv('data/task1.csv',encoding= 'latin')
task1.sentiment.unique().tolist()

['Negative',
 'Neutral / author is just sharing information',
 'Positive',
 'Tweet NOT related to nuclear energy']

### The content of the tasks sometime only contains some part of the actual task. 

__I have created some JSON structure for the tasks. The content of each microtask is represented based on the type, for example, for task 1 which is a tweet sentiment, the JSON have a `text` and `choice` objects. the `text` object itself has an `Array` of objects containing information we want to show to the worker For example :
`` {'content': t.tweet_text , 'label': 'tweet text'} `` means the `content` key has the value of tweet text and `label` is the label for that value.__
You can translate this information into HTML format. 

In [7]:
tasks = []
for i,t in task1.iterrows():
    x = {
        'text': [{'content': t.tweet_text , 'label': 'tweet text'}],
        'choice' : [{'label': 'sentiment', 'options': task1.sentiment.unique().tolist()}]        
        }
    tasks.append(x)


In [9]:
task_type_1 = TaskType.select().where(TaskType.task_type_id == 1).get()

In [10]:
ID = 1

In [11]:
for t in tasks : 
    Task.create(id=ID,task_type=task_type_1,content=t)
    ID += 1

In [4]:
task2 = pd.read_csv('data/task2.csv',encoding= 'latin')

In [13]:
task2.if_sentence_a_is_true_then.value_counts()

Sentence B cannot be said to be true or false    306
Sentence B is true                               136
Sentence B is false                              113
Name: if_sentence_a_is_true_then, dtype: int64

In [14]:
tasks = []
for i,t in task2.iterrows():
    x = {
        'text': [{'content': t.sentenceA , 'label': 'Sentence A'},{'content': t.sentenceB , 'label': 'Sentence B'}],
        'choice' : [{'label': 'if sentence A is true then', 'options': task2.if_sentence_a_is_true_then.unique().tolist()}]        
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 2).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 3

In [6]:
task3 = pd.read_csv('data/task3.csv',encoding= 'latin')
task3.head()

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,compatibility,compatibility:variance,orig__golden,compatibility_gold,plausibility_gold,plausibility_gold_reason,sentence,word1,word2
0,649597653,True,golden,21,,1.29,0.765,True,,1\n2,It is very hard to think of a real-life situat...,"This is not a coat, it is a system",coat,system
1,649597654,True,golden,22,,1.14,0.457,True,,1\n2,It is very hard to think of a real-life situat...,"This is not a cat, it is courses",cat,courses
2,649597655,True,golden,29,,1.17,0.647,True,,1\n2,It is very hard to think of a real-life situat...,"This is not broccoli, it is an influence",broccoli,influence
3,649597656,True,golden,23,,1.26,0.735,True,,1\n2,It is very hard to think of a real-life situat...,"This is not a trout, it is a unit",trout,unit
4,649597657,True,golden,30,,1.53,1.231,True,,1\n2,It is very hard to think of a real-life situat...,"This is not an oven, it is a king",oven,king


In [16]:
tasks = []
for i,t in task3.iterrows():
    x = {
        'text': [{'content': t.sentence , 'label': 'Sentence'}],
        'choice' : [{'label': 'Plausibility', 'options': ['1','2','3','4','5']}]        
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 3).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 4

In [17]:
task4 = pd.read_csv('data/task4.csv',encoding= 'latin')
task4.head()

Unnamed: 0,_unit_id,_golden,_canary,_unit_state,_trusted_judgments,_last_judgment_at,choose_the_corresponding_functionalities,choose_the_corresponding_functionalities:confidence,choose_the_corresponding_functionalities_gold,description,name
0,485643003,False,,finalized,3,6/20/14 9:05,ANDROID_TOOL,0.6718,,#1 ZIP application and first Android archiver ...,AndroZipã¢ File Manager
1,485643004,False,,finalized,3,6/19/14 23:45,KEYBOARD,1.0,,Over 10 million users enjoy ai.type! \n*** Tri...,A.I.type Keyboard Free
2,485643005,False,,finalized,3,6/20/14 14:36,KEYBOARD,1.0,,Over 10 million users LOVE ai.type! \n** Plus ...,ai.type Keyboard Plus
3,485643006,False,,finalized,3,6/20/14 17:25,GAME,0.6667,,"With MILLIONS of worldwide downloads, we think...",Math Workout
4,485643007,False,,finalized,3,6/19/14 21:58,NONE,0.6637,,Do you have problems turning off your alarm on...,Alarm Clock Xtreme


In [18]:
tasks = []
for i,t in task4.iterrows():
    x = {
        'text': [{'content': t.name , 'label': 'Name of App'},{'content': t.description , 'label': 'Description'}],
        'choice' : [{'label': 'Funcionalities', 'options': task4.choose_the_corresponding_functionalities.unique().tolist(),'multiple':True}]        
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 4).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 5

In [19]:
task5 = pd.read_csv('data/task5.csv',encoding= 'latin')
task5.head()

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,birthplace,birthplace:confidence,date_of_birth,date_of_birth:confidence,race_ethnicity,...,award,biourl,birthplace_gold,date_of_birth_gold,movie,person,race_ethnicity_gold,religion_gold,sexual_orientation_gold,year_of_award_gold
0,670454353,False,finalized,3,2/10/15 3:45,"Chisinau, Moldova",1.0,30-Sep-1895,1.0,White,...,Best Director,http://www.nndb.com/people/320/000043191/,,,Two Arabian Knights,Lewis Milestone,,,,
1,670454354,False,finalized,3,2/10/15 2:03,"Glasgow, Scotland",1.0,2-Feb-1886,1.0,White,...,Best Director,http://www.nndb.com/people/626/000042500/,,,The Divine Lady,Frank Lloyd,,,,
2,670454355,False,finalized,3,2/10/15 2:05,"Chisinau, Moldova",1.0,30-Sep-1895,1.0,White,...,Best Director,http://www.nndb.com/people/320/000043191/,,,All Quiet on the Western Front,Lewis Milestone,,,,
3,670454356,False,finalized,3,2/10/15 2:04,"Chicago, Il",1.0,23-Feb-1899,1.0,White,...,Best Director,http://www.nndb.com/people/544/000041421/,,,Skippy,Norman Taurog,,,,
4,670454357,False,finalized,3,2/10/15 1:48,"Salt Lake City, Ut",1.0,23-Apr-1894,1.0,White,...,Best Director,http://www.nndb.com/people/292/000044160/,,,Bad Girl,Frank Borzage,,,,


In [20]:
tasks = []
for i,t in task5.iterrows():
    x = {
        'text' : [{'content': t.person , 'label': 'Name of Person'},
                   {'content': t.movie , 'label': 'Movie'},
                   {'content': t.biourl , 'label': 'Biography URL'},
                   {'content': t.award , 'label': 'Awarded'}
                  ],
        'choice' : [{'label': 'Ethinicity', 'options': ['American Indian or Alaska Native','Asian','Black or African American','White','Native Hawaiian or Other Pacific Islander']},
                   {'label': 'Age', 'options': list(range(20,100))},
                   ]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 5).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 6

In [21]:
task6 = pd.read_csv('data/task6.csv',encoding= 'latin')
task6.head()

Unnamed: 0,_unit_id,category,category:confidence,image_url
0,851505458,ikat,0.3487,http://s3-eu-west-1.amazonaws.com/we-attribute...
1,851505459,plain,1.0,http://s3-eu-west-1.amazonaws.com/we-attribute...
2,851505460,polka dot,0.6709,http://s3-eu-west-1.amazonaws.com/we-attribute...
3,851505461,plain,1.0,http://s3-eu-west-1.amazonaws.com/we-attribute...
4,851505462,geometry,0.7035,http://s3-eu-west-1.amazonaws.com/we-attribute...


In [22]:
task6.category.unique().tolist()

['ikat',
 'plain',
 'polka dot',
 'geometry',
 'floral',
 'squares',
 'scales',
 'animal',
 'OTHER',
 'stripes',
 'tribal',
 'houndstooth',
 'cartoon',
 'chevron',
 'stars',
 'letter_numb',
 'skull']

In [23]:
tasks = []
for i,t in task6.iterrows():
    x = {
        'image' : [
                   {'label': "Image" , 'path': t.image_url}
                  ],
        'choice' : [{'label': 'Category', 'options': task6.category.unique().tolist()},                   
                   ]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 6).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 7

In [24]:
task7 = pd.read_csv('data/task7.csv',encoding= 'latin')
task7.head()

Unnamed: 0,_unit_id,image_url,transcription,first_or_last
0,952459271,http://crl.checkbacksoon.nl/dls/dss/d2m/15/fir...,HIMELIN,first
1,952459272,http://crl.checkbacksoon.nl/dls/dss/d2m/15/fir...,ROBIN,first
2,952459273,http://crl.checkbacksoon.nl/dls/dss/d2m/15/fir...,YOANN,first
3,952459274,http://crl.checkbacksoon.nl/dls/dss/d2m/15/fir...,MARTIN,first
4,952459275,http://crl.checkbacksoon.nl/dls/dss/d2m/15/fir...,CLEMENT,first


In [25]:
tasks = []
for i,t in task7.iterrows():
    x = {
        'image' : [
                   {'label': "Image" , 'path': t.image_url}
                  ],
        'input' : [{'label': 'Name'}]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 7).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 8

In [26]:
task8 = pd.read_csv('data/task8.csv',encoding= 'latin')
task8.head()

Unnamed: 0,_unit_id,_golden,_unit_state,_trusted_judgments,_last_judgment_at,image_broken,image_broken:confidence,true_or_false,true_or_false:confidence,image_broken_gold,image_url1,image_url2,name_1,name_2,true_or_false_gold
0,613817588,True,golden,570,,,1,False,1.0,,http://homes.cs.washington.edu/~sunyuyin/Crowd...,http://homes.cs.washington.edu/~sunyuyin/Crowd...,jonathan apple,root,False
1,613817589,True,golden,641,,,1,False,1.0,,http://homes.cs.washington.edu/~sunyuyin/Crowd...,http://homes.cs.washington.edu/~sunyuyin/Crowd...,citrus fruit,asian pear,False
2,613817590,True,golden,586,,,1,False,1.0,,http://homes.cs.washington.edu/~sunyuyin/Crowd...,http://homes.cs.washington.edu/~sunyuyin/Crowd...,gala apple,sandwhich,False
3,613817591,False,finalized,8,11/30/14 14:11,,1,False,1.0,,http://homes.cs.washington.edu/~sunyuyin/Crowd...,http://homes.cs.washington.edu/~sunyuyin/Crowd...,garden tomato,allium,
4,613817592,False,finalized,8,11/30/14 4:33,,1,False,1.0,,http://homes.cs.washington.edu/~sunyuyin/Crowd...,http://homes.cs.washington.edu/~sunyuyin/Crowd...,seed,bok choy,


In [27]:
tasks = []
for i,t in task8.iterrows():
    x = {
        'text' : [
                   {'content': "All {0}s are {1}s".format(t.name_1,t.name_2) , 'label': 'Claim'}
                  ],
        'choice' : [{'label': 'True or False', 'options': ['True','False']},]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 8).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 9

In [28]:
task9 = pd.read_csv('data/task9.csv',encoding= 'latin')
task9.head()

Unnamed: 0,other_topic,resolution_topics,gender,name,Resolution_Category,retweet_count,text,tweet_coord,tweet_created,tweet_date,tweet_id,tweet_location,tweet_state,user_timezone,tweet_region
0,"Read moore books, read less facebook.",Eat healthier,female,Dena_Marina,Health & Fitness,0.0,"#NewYearsResolution :: Read more books, No scr...",,12/31/14 10:48,12/31/14,5.50363e+17,Southern California,CA,Pacific Time (US & Canada),West
1,,Humor about Personal Growth and Interests Reso...,female,ninjagirl325,Humor,1.0,#NewYearsResolution Finally master @ZJ10 's pa...,,12/31/14 10:47,12/31/14,5.50363e+17,New Jersey,NJ,Central Time (US & Canada),Northeast
2,,Be More Confident,male,RickyDelReyy,Personal Growth,0.0,#NewYearsResolution to stop being so damn perf...,,12/31/14 10:46,12/31/14,5.50362e+17,Hollywood,CA,Eastern Time (US & Canada),West
3,Help More\nspread pet therapy|helping other,Other,male,CalmareNJ,Philanthropic,0.0,My #NewYearsResolution is to help my disabled ...,,12/31/14 10:45,12/31/14,5.50362e+17,Metro NYC,NY,,Northeast
4,,Be more positive,female,welovatoyoudemi,Personal Growth,0.0,#NewYearsResolution #2015Goals #2015bucketlist...,,12/31/14 10:44,12/31/14,5.50362e+17,"Pittsburgh, Pennsylvania",PA,Eastern Time (US & Canada),Northeast


In [29]:
task9.Resolution_Category.unique().tolist()

['Health & Fitness',
 'Humor',
 'Personal Growth',
 'Philanthropic',
 'Recreation & Leisure',
 'Family/Friends/Relationships',
 'Career',
 'Finance',
 'Education/Training',
 'Time Management/Organization']

In [30]:
tasks = []
for i,t in task9.iterrows():
    x = {
        'text' : [
                   {'content': t.text , 'label': 'Tweet Text'}                   
                  ],
        'choice' : [{'label': 'Resolution Category', 'options': task9.Resolution_Category.unique().tolist()},]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 9).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

### TASK 10

In [31]:
task10 = pd.read_csv('data/task10.csv',encoding= 'latin')
task10.head()

Unnamed: 0,post_yn,cloud_software_required,database_software_required,statistic_software_required,programming_language_required,linkedin_url
0,yes,Hive,SQL,R,Python,https://www.linkedin.com/jobs2/view/26909460?t...
1,yes,,SQL,,Python,https://www.linkedin.com/jobs2/view/18721409?t...
2,yes,NoSQL,SQL,,Python,https://www.linkedin.com/jobs2/view/13715592?t...
3,yes,NoSQL,SQL,SPSS,Python,https://www.linkedin.com/jobs2/view/13529837?t...
4,yes,Pig,,R,Python,https://www.linkedin.com/jobs2/view/38267683?t...


In [32]:
task10 = task10.fillna('other')

In [33]:
tasks = []
for i,t in task10.iterrows():
    x = {
        'text' : [
                   {'content': t.linkedin_url , 'label': 'LinkedIn Post'}                   
                  ],
        'choice' : [
            {'label': 'Programming Language', 'options': task10.programming_language_required.unique().tolist()},
            {'label': 'Statistic Software', 'options': task10.statistic_software_required.unique().tolist()},
            {'label': 'Database Software', 'options': task10.database_software_required.unique().tolist()}
        ]
        }
    tasks.append(x)


task_type = TaskType.select().where(TaskType.task_type_id == 10).get()

ID += 1
for t in tasks : 
    Task.create(id=ID,task_type=task_type,content=t)
    ID += 1

In [29]:
features = []
for i in TaskType.select():
    print(i.task_keywords)
    features.extend(i.task_keywords)
features = list(set(features))

['sentiment analysis', 'text categorization', 'climate change']
['word comparison', 'semantic similarity']
['sentence agreement', 'sentence comparison']
['image annotation', 'image rating']
['image categorization', 'sentiment analysis']
['image categorization', 'pattern recognition', 'image annotation']
['transcription', 'image categorization', 'handwriting']
['semantic analysis', 'semantic similarity', 'comparison']
['sentiment analysis', 'emotion detection', 'resolutions']
['sentiment analysis', 'emotion detection', 'self-driving cars']


In [30]:
features

['sentence comparison',
 'handwriting',
 'comparison',
 'image rating',
 'resolutions',
 'self-driving cars',
 'emotion detection',
 'image annotation',
 'image categorization',
 'pattern recognition',
 'sentence agreement',
 'word comparison',
 'transcription',
 'semantic analysis',
 'sentiment analysis',
 'text categorization',
 'semantic similarity',
 'climate change']

In [31]:
workers = pd.read_csv('data/20190301-User Profiles.csv',encoding='latin')

In [32]:
workers.Keywords[0]

'image categorization|image rating|pattern recognition|text categorization|word comparison|climate change|self-driving cars'

In [33]:
worker_keywords = pd.DataFrame(0,columns=features,index=workers.WorkerId)

In [35]:
for i,wk in workers.loc[:,['Keywords','WorkerId']].iterrows():
    for k in wk.Keywords.split('|'):
        if k in features:
            worker_keywords.loc[wk.WorkerId,k] = 1
        

In [37]:
worker_keywords.shape

(179, 18)

In [38]:
import os

In [97]:
for worker in worker_keywords.index:
    print(worker)
    with open("./data/assignments/random/{0}.tasks".format(worker),'w') as f:
        f.write("#{0}".format(worker))
        f.write("\n")

A1N9T0CFEI3LIW
A1YI1H05AQ3ING
A35P8YSNTQ7OTP
A5MZPF0UJ2TXJ
AIT1599C1IXTK
A223UBAD8S1SIB
ACP3L9X63GBRK
A29IDVL9EX3U7S
A2J9NY11VXRRT8
ACG0A16FI0MOJ
ADC4VXNQJ9FEW
A2WJT2WZ7A3C71
A48WNR6C4CI3J
A22AOKG0HK2DKP
A32QJF67JMJFKX
A1YKHG0IPO7YTR
AXY2PQVGC1GLX
A34442XB8HN7C0


In [39]:
for worker in worker_keywords.index:
    print(worker)
    tasks = [str(t.id) for t in Task.select().order_by(fn.Random()).limit(12)]
    with open("./data/assignments/random/{0}.tasks".format(worker),'w') as f:
        f.write("#{0}".format(worker))
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[0:3]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[3:6]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[6:]))
        f.write("}")

A317RA15E0SZ4P
A2QPMJ4GADLUW2
A3D6IPJ4RBCNVY
A2PAA0J2BTUYAG
A3F28XLLJ8ZUEM
AS4OUUE7SS7L9
A2LHD2SLJDEZGV
A1PLZ8RM4NW43J
A11M80TFYCE94G
A2KD3MCKKRADFB
A8DWTH31EYL13
AGQPTDG6ZF3T3
A2JJS93VRUF32W
A1J53RCE5Y2KA4
A272ZA1I98A68E
A3E5RC4DCJNK8L
AJJN3YJZ4H61K
A182J6QRCJP228
A2A5WYVO1PRB1Z
A304VR1GWSJUV
A100Y4C73CCD92
A2V4CKDV6TKXQ0
A3BK6RENTXQTOK
A1160COTUR26JZ
A110VR946DJCOG
A3MLKJMB7CCUFF
A2E91V21RE2ZDZ
A2N10QFJ8OCU2T
AIO8DX5OGCGN1
A3JEM05G181BNU
A2VPAI1545GXWC
AG303MJ0V2ROR
A1O4W7F05T6HY0
A8GD33ZJ1SAA5
A2M5I4KGKF9J7Q
AE1MZIYF1J4IK
A3GJPHFUCNBO8J
A7B1KAIRPG8KY
A1EX655HLB0NS4
A1M7UN3FG00PVC
ADOPEZ9I2RASP
A2BOYLRH81N3IL
AQD21K1SZ2052
A4AY5EKJZMTES
A1DE5AS2O48RT5
ADJNEKYVF2F8Q
A2A4GYJ44EYZ86
A3PSIT0A7OU2FL
A2UX1I209LML50
ADSGMSB2WVN7P
A26LBAAIP5J93S
A1KU59GDGBCYIM
A31ERF1G5FXUH0
A3AQDOB3RTJS2O
A28W7USM54QQKW
A3NNMPGQT7DSXS
A70TBKPGWQZO4
AK3GZE8MPQL4Y
AFC9Q2YBBESIP
A14SRK8J2NDFOR
A3GVIO0THJXK51
A34QVV6QUHYMST
A25XEDSN51Y0O
A23AP978617EJS
A3VNLVMTXFM126
ATSLS2GQ6BQ6A
A35X2QXIS7N1W1
A917IAL11JX3B
A

In [40]:
task_type_features = pd.DataFrame(0,columns=features,index=range(1,11))
for t in TaskType.select():
    i = t.task_type_id
    for d in t.task_keywords:
        task_type_features.loc[i,d] = 1


In [41]:
task_type_features

Unnamed: 0,sentence comparison,handwriting,comparison,image rating,resolutions,self-driving cars,emotion detection,image annotation,image categorization,pattern recognition,sentence agreement,word comparison,transcription,semantic analysis,sentiment analysis,text categorization,semantic similarity,climate change
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0
6,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
7,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0
8,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
9,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0
10,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0


In [42]:
from scipy.spatial import distance
from pprint import pprint

In [14]:
[str(t.id) for t in Task.select().where(Task.task_type.in_([1,10])).order_by(fn.Random()).limit(12)]

['157920',
 '157904',
 '157847',
 '158586',
 '158140',
 '158291',
 '157902',
 '157882',
 '158482',
 '77',
 '157954',
 '110']

In [15]:
tasks = [str(t.id) for t in Task.select().where(Task.task_type.in_(x_tasks)).order_by(fn.Random()).limit(12)]
tasks

NameError: name 'x_tasks' is not defined

In [16]:
for w in sorted(worker_keywords.index):
    print(w)
    x = sorted({(t,TaskType.select().where(TaskType.task_type_id==t).get().requester):distance.cosine(worker_keywords.loc[w,:].values,task_type_features.loc[t,:].values) for t in range(1,11)}.items(),key=lambda x : x[1])
    x_good = [xx for xx in x if xx[1] < 0.7]
    pprint(x_good)
    x_tasks = [i[0][0] for i in x_good]
    pprint(x_tasks)
    tasks = [str(t.id) for t in Task.select().where(Task.task_type.in_(x_tasks)).order_by(fn.Random()).limit(12)]
    with open("./data/assignments/ct/{0}.tasks".format(w),'w') as f:
        f.write("#{0}".format(w))
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[0:3]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[3:6]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[6:]))
        f.write("}")
    

A094612415T5EHTYKXTQ
[((2, 'req4'), 0.42264973081037427)]
[2]
A122RVIUXTKC9I
[((1, 'req2'), 0.42264973081037427),
 ((9, 'req2'), 0.42264973081037427),
 ((6, 'req1'), 0.5285954792089684)]
[1, 9, 6]
A17D6BK59S31BM
[((1, 'req2'), 0.5285954792089683),
 ((3, 'req1'), 0.5285954792089683),
 ((9, 'req2'), 0.5285954792089683),
 ((10, 'req4'), 0.5285954792089683),
 ((2, 'req4'), 0.6150998205402495),
 ((6, 'req1'), 0.6150998205402495)]
[1, 3, 9, 10, 2, 6]
A18YG2P9W6F1MM
[((6, 'req1'), 0.38762756430420553), ((10, 'req4'), 0.5)]
[6, 10]
A191V8LNTTLHSA
[((4, 'req3'), 0.2928932188134524),
 ((10, 'req4'), 0.2928932188134524),
 ((6, 'req1'), 0.42264973081037427)]
[4, 10, 6]
A19HCBT1EH8484
[((2, 'req4'), 0.5370899501137243),
 ((6, 'req1'), 0.5370899501137243),
 ((1, 'req2'), 0.6220355269907728),
 ((3, 'req1'), 0.6220355269907728),
 ((4, 'req3'), 0.6220355269907728),
 ((7, 'req1'), 0.6220355269907728),
 ((8, 'req3'), 0.6220355269907728),
 ((9, 'req2'), 0.6220355269907728),
 ((10, 'req4'), 0.6220355269907

A2UY2IUBR3I2QC
[((6, 'req1'), 0.5285954792089684)]
[6]
A2VH2167ASI7CQ
[((1, 'req2'), 0.5),
 ((7, 'req1'), 0.5),
 ((9, 'req2'), 0.5),
 ((6, 'req1'), 0.5917517095361371)]
[1, 7, 9, 6]
A2VPHDKX329A8I
[((6, 'req1'), 0.4777670321329065),
 ((3, 'req1'), 0.5735985672887791),
 ((4, 'req3'), 0.5735985672887791),
 ((7, 'req1'), 0.5735985672887791),
 ((8, 'req3'), 0.5735985672887791),
 ((10, 'req4'), 0.5735985672887791),
 ((2, 'req4'), 0.6518446880886044)]
[6, 3, 4, 7, 8, 10, 2]
A2ZPPP0XVV8C0W
[((3, 'req1'), 0.5917517095361371),
 ((7, 'req1'), 0.5917517095361371),
 ((10, 'req4'), 0.5917517095361371),
 ((6, 'req1'), 0.6666666666666667)]
[3, 7, 10, 6]
A316WVL6W5GKHA
[((2, 'req4'), 0.5370899501137243),
 ((6, 'req1'), 0.5370899501137243),
 ((1, 'req2'), 0.6220355269907728),
 ((3, 'req1'), 0.6220355269907728),
 ((4, 'req3'), 0.6220355269907728),
 ((7, 'req1'), 0.6220355269907728),
 ((8, 'req3'), 0.6220355269907728),
 ((9, 'req2'), 0.6220355269907728),
 ((10, 'req4'), 0.6220355269907728)]
[2, 6, 1, 3, 

[((3, 'req1'), 0.5), ((7, 'req1'), 0.5)]
[3, 7]
AEQ4OHM042RLA
[((2, 'req4'), 0.5370899501137243),
 ((6, 'req1'), 0.5370899501137243),
 ((1, 'req2'), 0.6220355269907728),
 ((3, 'req1'), 0.6220355269907728),
 ((4, 'req3'), 0.6220355269907728),
 ((7, 'req1'), 0.6220355269907728),
 ((8, 'req3'), 0.6220355269907728),
 ((9, 'req2'), 0.6220355269907728),
 ((10, 'req4'), 0.6220355269907728)]
[2, 6, 1, 3, 4, 7, 8, 9, 10]
AF09UGIC6B81Y
[((3, 'req1'), 0.42264973081037427)]
[3]
AHFGWXN0A2PJM
[((6, 'req1'), 0.4522774424948338),
 ((7, 'req1'), 0.552786404500042),
 ((10, 'req4'), 0.552786404500042),
 ((2, 'req4'), 0.6348516283298893)]
[6, 7, 10, 2]
ALDHDBRTUI1NI
[((6, 'req1'), 0.3453463292920228),
 ((7, 'req1'), 0.4654775161751512),
 ((10, 'req4'), 0.4654775161751512)]
[6, 7, 10]
AMSD0JV1W7V9F
[((7, 'req1'), 0.5917517095361371),
 ((10, 'req4'), 0.5917517095361371),
 ((6, 'req1'), 0.6666666666666667)]
[7, 10, 6]
AQ4PB1PVI9U7V
[((2, 'req4'), 0.5370899501137243),
 ((6, 'req1'), 0.5370899501137243),
 ((1

In [43]:
from mmr import mmr


In [None]:
def generate_normalized_sim(worker_id):
    task_worker_dist_matrix = []
    for i in range(1,11):
        task_worker_dist_matrix.append(sim_rel(worker_id,i))
    

In [81]:
TaskType.get_by_id(1).expected_pay

0.02

In [82]:
def sim_rel(worker_id,task_id):
    if worker_id in worker_keywords.index:
        return 1-distance.cosine(worker_keywords.loc[worker_id,:].values,task_type_features.loc[task_id,:].values)
    else:
        return 1

def sim_payment(tid_1,tid_2):
    return (TaskType.get_by_id(tid_1).expected_pay - TaskType.get_by_id(tid_2).expected_pay)**2

In [87]:
mmr(3,list(range(1,11)),"A317RA15E0SZ4P",0.1,0.9,sim_rel,sim_payment)

([1, 5, 10], [(1, 0.1), (5, 0.1), (10, 0.1)])

In [73]:
print(worker_keywords.loc["A317RA15E0SZ4P",:])
for i in range(1,11):    
    print(TaskType.select().where(TaskType.task_type_id==i).get().task_keywords,sim_rel("A317RA15E0SZ4P",i))

sentence comparison     0
handwriting             0
comparison              0
image rating            1
resolutions             0
self-driving cars       1
emotion detection       0
image annotation        0
image categorization    1
pattern recognition     1
sentence agreement      0
word comparison         1
transcription           0
semantic analysis       0
sentiment analysis      0
text categorization     1
semantic similarity     0
climate change          1
Name: A317RA15E0SZ4P, dtype: int64
['sentiment analysis', 'text categorization', 'climate change'] 0.4364357804719847
['word comparison', 'semantic similarity'] 0.2672612419124243
['sentence agreement', 'sentence comparison'] 0.0
['image annotation', 'image rating'] 0.2672612419124243
['image categorization', 'sentiment analysis'] 0.2672612419124243
['image categorization', 'pattern recognition', 'image annotation'] 0.4364357804719847
['transcription', 'image categorization', 'handwriting'] 0.21821789023599236
['semantic analy

In [18]:
# MMR
for w in sorted(worker_keywords.index):
    print(w)
    x = sorted({(t,TaskType.select().where(TaskType.task_type_id==t).get().requester):distance.cosine(worker_keywords.loc[w,:].values,task_type_features.loc[t,:].values) for t in range(1,11)}.items(),key=lambda x : x[1])
    x_good = [xx for xx in x if xx[1] < 2]
    pprint(x_good)
    x_tasks = [i[0][0] for i in x_good]
    pprint(x_tasks)
    task_1 = [str(t.id) for t in Task.select().where(Task.task_type.in_(x_tasks)).order_by(fn.Random()).limit(12)]
    with open("./data/assignments/mmr/{0}.tasks".format(w),'w') as f:
        f.write("#{0}".format(w))
        f.write("\n")
        f.write("{")
        f.write(",".join(task_1[0:3]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(task_1[3:6]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(task_1[6:]))
        f.write("}")
    

A094612415T5EHTYKXTQ
[((2, 'req4'), 0.42264973081037427),
 ((1, 'req2'), 1.0),
 ((3, 'req1'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0),
 ((6, 'req1'), 1.0),
 ((7, 'req1'), 1.0),
 ((8, 'req3'), 1.0),
 ((9, 'req2'), 1.0),
 ((10, 'req4'), 1.0)]
[2, 1, 3, 4, 5, 6, 7, 8, 9, 10]
A122RVIUXTKC9I
[((1, 'req2'), 0.42264973081037427),
 ((9, 'req2'), 0.42264973081037427),
 ((6, 'req1'), 0.5285954792089684),
 ((7, 'req1'), 0.7113248654051871),
 ((8, 'req3'), 0.7113248654051871),
 ((10, 'req4'), 0.7113248654051871),
 ((2, 'req4'), 0.7642977396044842),
 ((3, 'req1'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0)]
[1, 9, 6, 7, 8, 10, 2, 3, 4, 5]
A17D6BK59S31BM
[((1, 'req2'), 0.5285954792089683),
 ((3, 'req1'), 0.5285954792089683),
 ((9, 'req2'), 0.5285954792089683),
 ((10, 'req4'), 0.5285954792089683),
 ((2, 'req4'), 0.6150998205402495),
 ((6, 'req1'), 0.6150998205402495),
 ((4, 'req3'), 0.7642977396044841),
 ((7, 'req1'), 0.7642977396044841),
 ((8, 'req3'), 0.7642977396044841),
 ((5, 'req3'), 

A21JTY4NPNVRPB
[((4, 'req3'), 0.5285954792089683),
 ((7, 'req1'), 0.5285954792089683),
 ((10, 'req4'), 0.5285954792089683),
 ((6, 'req1'), 0.6150998205402495),
 ((3, 'req1'), 0.7642977396044841),
 ((5, 'req3'), 0.7642977396044841),
 ((2, 'req4'), 0.8075499102701247),
 ((1, 'req2'), 1.0),
 ((8, 'req3'), 1.0),
 ((9, 'req2'), 1.0)]
[4, 7, 10, 6, 3, 5, 2, 1, 8, 9]
A27B0BV6MNTEPZ
[((8, 'req3'), 0.18350341907227408),
 ((2, 'req4'), 0.33333333333333337),
 ((3, 'req1'), 0.5917517095361371),
 ((7, 'req1'), 0.5917517095361371),
 ((6, 'req1'), 0.6666666666666667),
 ((1, 'req2'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0),
 ((9, 'req2'), 1.0),
 ((10, 'req4'), 1.0)]
[8, 2, 3, 7, 6, 1, 4, 5, 9, 10]
A27K3UK3TETLIC
[((6, 'req1'), 0.42264973081037427),
 ((4, 'req3'), 0.6464466094067263),
 ((7, 'req1'), 0.6464466094067263),
 ((10, 'req4'), 0.6464466094067263),
 ((1, 'req2'), 1.0),
 ((2, 'req4'), 1.0),
 ((3, 'req1'), 1.0),
 ((5, 'req3'), 1.0),
 ((8, 'req3'), 1.0),
 ((9, 'req2'), 1.0)]
[6, 4, 7, 10, 1

A34AYCNA4VHI3B
[((1, 'req2'), 0.683772233983162),
 ((7, 'req1'), 0.683772233983162),
 ((9, 'req2'), 0.683772233983162),
 ((10, 'req4'), 0.683772233983162),
 ((2, 'req4'), 0.7418011102528388),
 ((6, 'req1'), 0.7418011102528388),
 ((3, 'req1'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0),
 ((8, 'req3'), 1.0)]
[1, 7, 9, 10, 2, 6, 3, 4, 5, 8]
A34Q9JGU37OQJM
[((1, 'req2'), 0.5917517095361371),
 ((7, 'req1'), 0.5917517095361371),
 ((9, 'req2'), 0.5917517095361371),
 ((6, 'req1'), 0.6666666666666667),
 ((2, 'req4'), 1.0),
 ((3, 'req1'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0),
 ((8, 'req3'), 1.0),
 ((10, 'req4'), 1.0)]
[1, 7, 9, 6, 2, 3, 4, 5, 8, 10]
A35P8YSNTQ7OTP
[((5, 'req3'), 0.2928932188134524),
 ((1, 'req2'), 1.0),
 ((2, 'req4'), 1.0),
 ((3, 'req1'), 1.0),
 ((4, 'req3'), 1.0),
 ((6, 'req1'), 1.0),
 ((7, 'req1'), 1.0),
 ((8, 'req3'), 1.0),
 ((9, 'req2'), 1.0),
 ((10, 'req4'), 1.0)]
[5, 1, 2, 3, 4, 6, 7, 8, 9, 10]
A37Q6RIUC4FASM
[((3, 'req1'), 0.5285954792089683),
 ((7, 'req1'),

A3RNP0T2RR422G
[((10, 'req4'), 0.42264973081037427),
 ((6, 'req1'), 0.5285954792089684),
 ((1, 'req2'), 0.7113248654051871),
 ((3, 'req1'), 0.7113248654051871),
 ((4, 'req3'), 0.7113248654051871),
 ((9, 'req2'), 0.7113248654051871),
 ((2, 'req4'), 1.0),
 ((5, 'req3'), 1.0),
 ((7, 'req1'), 1.0),
 ((8, 'req3'), 1.0)]
[10, 6, 1, 3, 4, 9, 2, 5, 7, 8]
A3TEVNU2YYO1VH
[((7, 'req1'), 0.3675444679663241),
 ((6, 'req1'), 0.4836022205056777),
 ((1, 'req2'), 0.683772233983162),
 ((3, 'req1'), 0.683772233983162),
 ((9, 'req2'), 0.683772233983162),
 ((2, 'req4'), 1.0),
 ((4, 'req3'), 1.0),
 ((5, 'req3'), 1.0),
 ((8, 'req3'), 1.0),
 ((10, 'req4'), 1.0)]
[7, 6, 1, 3, 9, 2, 4, 5, 8, 10]
A3VMU5OZOBNJLN
[((1, 'req2'), 0.5285954792089683),
 ((7, 'req1'), 0.5285954792089683),
 ((9, 'req2'), 0.5285954792089683),
 ((3, 'req1'), 0.7642977396044841),
 ((4, 'req3'), 0.7642977396044841),
 ((5, 'req3'), 0.7642977396044841),
 ((8, 'req3'), 0.7642977396044841),
 ((10, 'req4'), 0.7642977396044841),
 ((2, 'req4'), 0.

[((6, 'req1'), 0.29289321881345254),
 ((10, 'req4'), 0.42264973081037427),
 ((1, 'req2'), 0.7113248654051871),
 ((4, 'req3'), 0.7113248654051871),
 ((7, 'req1'), 0.7113248654051871),
 ((8, 'req3'), 0.7113248654051871),
 ((9, 'req2'), 0.7113248654051871),
 ((2, 'req4'), 0.7642977396044842),
 ((3, 'req1'), 1.0),
 ((5, 'req3'), 1.0)]
[6, 10, 1, 4, 7, 8, 9, 2, 3, 5]
AYIFHDQSXQJ6B
[((2, 'req4'), 0.4777670321329065),
 ((1, 'req2'), 0.5735985672887791),
 ((3, 'req1'), 0.5735985672887791),
 ((7, 'req1'), 0.5735985672887791),
 ((8, 'req3'), 0.5735985672887791),
 ((9, 'req2'), 0.5735985672887791),
 ((6, 'req1'), 0.6518446880886044),
 ((4, 'req3'), 0.7867992836443896),
 ((5, 'req3'), 0.7867992836443896),
 ((10, 'req4'), 0.7867992836443896)]
[2, 1, 3, 7, 8, 9, 6, 4, 5, 10]


In [27]:

for w in sorted(worker_keywords.index):
    print(w)
    x = sorted({(t,TaskType.select().where(TaskType.task_type_id==t).get().requester):distance.cosine(worker_keywords.loc[w,:].values,task_type_features.loc[t,:].values) for t in range(1,11)}.items(),key=lambda x : x[1])
    x_good = [xx for xx in x if xx[1] < 0.4]
    if len(x_good) < 1:
        x_good = [xx for xx in x if xx[1] < 0.6]
    if len(x_good) < 1:
        x_good = [xx for xx in x if xx[1] < 0.7]
    if len(x_good) < 1:
        x_good = [xx for xx in x if xx[1] < 0.8]
    if len(x_good) < 1:
        x_good = [xx for xx in x if xx[1] < 1.1]

    pprint(x_good)
    x_tasks = [i[0][0] for i in x_good]
    pprint(x_tasks)
    tasks = []
    if len(x_tasks) > 2:
        for i in range(3): 
            tasks.extend([str(t.id) for t in Task.select().where(Task.task_type == x_tasks[i]).order_by(fn.Random()).limit(4)])
    elif 1 < len(x_tasks) < 3: 
        tasks.extend([str(t.id) for t in Task.select().where(Task.task_type == x_tasks[0]).order_by(fn.Random()).limit(4)])
        tasks.extend([str(t.id) for t in Task.select().where(Task.task_type == x_tasks[1]).order_by(fn.Random()).limit(4)])
        tasks.extend([str(t.id) for t in Task.select().where(Task.task_type == x_tasks[0]).order_by(fn.Random()).limit(4)])
    else:
        tasks = [str(t.id) for t in Task.select().where(Task.task_type == x_tasks[0]).order_by(fn.Random()).limit(12)]
    pprint(tasks)
    with open("./data/assignments/ours/{0}.tasks".format(w),'w') as f:
        f.write("#{0}".format(w))
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[0:3]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[3:6]))
        f.write("}")
        f.write("\n")
        f.write("{")
        f.write(",".join(tasks[6:]))
        f.write("}")
    

A094612415T5EHTYKXTQ
[((2, 'req4'), 0.42264973081037427)]
[2]
['673',
 '213',
 '463',
 '278',
 '292',
 '366',
 '392',
 '737',
 '245',
 '576',
 '491',
 '460']
A122RVIUXTKC9I
[((1, 'req2'), 0.42264973081037427),
 ((9, 'req2'), 0.42264973081037427),
 ((6, 'req1'), 0.5285954792089684)]
[1, 9, 6]
['48',
 '83',
 '82',
 '115',
 '154315',
 '154928',
 '155371',
 '157001',
 '4293',
 '18722',
 '5306',
 '11634']
A17D6BK59S31BM
[((1, 'req2'), 0.5285954792089683),
 ((3, 'req1'), 0.5285954792089683),
 ((9, 'req2'), 0.5285954792089683),
 ((10, 'req4'), 0.5285954792089683)]
[1, 3, 9, 10]
['145',
 '100',
 '90',
 '60',
 '889',
 '796',
 '797',
 '908',
 '152928',
 '153805',
 '156529',
 '156664']
A18YG2P9W6F1MM
[((6, 'req1'), 0.38762756430420553)]
[6]
['10224',
 '9046',
 '9356',
 '10702',
 '4184',
 '3768',
 '9660',
 '17052',
 '8418',
 '17331',
 '17982',
 '8267']
A191V8LNTTLHSA
[((4, 'req3'), 0.2928932188134524), ((10, 'req4'), 0.2928932188134524)]
[4, 10]
['2260',
 '2310',
 '1837',
 '1736',
 '158291',
 '158

['50708',
 '36895',
 '80596',
 '20071',
 '158192',
 '158466',
 '157925',
 '158362',
 '5015',
 '12724',
 '13554',
 '5590']
A2UP5X3XSYJNTD
[((1, 'req2'), 0.42264973081037427),
 ((9, 'req2'), 0.42264973081037427),
 ((6, 'req1'), 0.5285954792089684)]
[1, 9, 6]
['183',
 '65',
 '43',
 '134',
 '156487',
 '154731',
 '154400',
 '156922',
 '19108',
 '17746',
 '9546',
 '16068']
A2UQUDK22T6PED
[((10, 'req4'), 0.3675444679663241)]
[10]
['158104',
 '158462',
 '158358',
 '158448',
 '158326',
 '158352',
 '158047',
 '158256',
 '157782',
 '157976',
 '157951',
 '158156']
A2UY2IUBR3I2QC
[((6, 'req1'), 0.5285954792089684)]
[6]
['12599',
 '16447',
 '4270',
 '9420',
 '13511',
 '14379',
 '7083',
 '3960',
 '13859',
 '12178',
 '18356',
 '9979']
A2VH2167ASI7CQ
[((1, 'req2'), 0.5),
 ((7, 'req1'), 0.5),
 ((9, 'req2'), 0.5),
 ((6, 'req1'), 0.5917517095361371)]
[1, 7, 9, 6]
['75',
 '57',
 '51',
 '182',
 '98829',
 '108630',
 '125922',
 '131619',
 '156390',
 '154185',
 '154979',
 '153104']
A2VPHDKX329A8I
[((6, 'req1')

[((2, 'req4'), 0.5370899501137243), ((6, 'req1'), 0.5370899501137243)]
[2, 6]
['415',
 '638',
 '431',
 '342',
 '16450',
 '8502',
 '16367',
 '11090',
 '546',
 '651',
 '239',
 '212']
A3VQVV99604GHU
[((4, 'req3'), 0.2928932188134524), ((10, 'req4'), 0.2928932188134524)]
[4, 10]
['2477',
 '2622',
 '2223',
 '2855',
 '158062',
 '158094',
 '157973',
 '158629',
 '2870',
 '2451',
 '2850',
 '2595']
A3W3IE7JDI5V5X
[((1, 'req2'), 0.5),
 ((7, 'req1'), 0.5),
 ((9, 'req2'), 0.5),
 ((6, 'req1'), 0.5917517095361371)]
[1, 7, 9, 6]
['33',
 '133',
 '151',
 '40',
 '103628',
 '59954',
 '142829',
 '63125',
 '156772',
 '155297',
 '154539',
 '154909']
A5024WK5CE1BT
[((2, 'req4'), 0.5370899501137243), ((6, 'req1'), 0.5370899501137243)]
[2, 6]
['327',
 '304',
 '574',
 '319',
 '11728',
 '9262',
 '9384',
 '15071',
 '399',
 '461',
 '719',
 '513']
A5V3ZMQI0PU3F
[((2, 'req4'), 0.5196155385847385), ((6, 'req1'), 0.5196155385847385)]
[2, 6]
['216',
 '584',
 '290',
 '602',
 '7054',
 '14813',
 '5550',
 '10603',
 '210',
 