# Imports

In [1]:
from bson.json_util import dumps
import pymongo
import json

# Reading the necessary Keys

In [2]:
file_path = '../keys.json'
target_key = 'mongo_path'
with open(file_path, 'r') as file:
    data = json.load(file)
cluster_path = data.get(target_key)
if cluster_path is not None:
    print(f"Cluster Path found")


Cluster Path found


# MongoDB Initializations

In [3]:
# Establish a connection to the MongoDB cluster
cluster = pymongo.MongoClient(cluster_path)

# Connect to a specific database
db = cluster["DialoKEY"]

# Read a document from a collection
# From collection and collection_cleaned all data can be read and downloaded that were generated via normal mongodb operations
collection = db["logs_04"]  # contains ALL logs from the experiment
collection_cleaned = db["logs_cleaned_3"]  # contains only the cleaned logs
document = collection.find_one()

# Read the data

In [4]:
# Exclude some known uuids that did not to the experiment well
excluded_uuids = ["3cec1211d1414f32a8ab9b405f9e4296", 'ca5e6287541d44ba9c9128016657d593',
                  '2dc6b3ff2392476ea9c895f03fc312a7']

# Search for all uuids with the correct code (that passed the attention check)
all_documents = []
uuid_list = []
cursor = collection.find({"$and": [{"final_code": {"$regex": "-PP"}}]})

for document in cursor:
    uuid_list.append(document['uuid'][0])

# Find all answers with the uuids that have the correct code
for count, uuid in enumerate(uuid_list):
    if uuid in excluded_uuids:
        continue
    print(f"STATUS: {float(((count + 1) / len(uuid_list)) * 100)} %")

    # initialize another cursor and filter the results by the uuid
    # HINT: Cursors are being consumed, so they can not be re-used
    cursor2 = collection.find({"$and": [{"uuid": {"$regex": uuid}}]})
    cursor3 = collection.find({"$and": [{"uuid": {"$regex": uuid}}]})
    counter = 0

    for document in cursor3:
        # If "Okay" is pressed more than 5 times -> Skip this participant's answer -> Possible fraud
        if "User Flag" in document:
            if 'Okay' in document["User Flag"]:
                counter += 1
    if counter > 5:
        excluded_uuids.append(uuid)

    for document in cursor2:
        # If the current document is the attention check itself or the message of the final code -> Skip the document
        if 'Attention Check' in document:
            if document['Attention Check']:
                continue
        if 'final_code' in document:
            continue

        # Upsert the cleaned documents into a new database where only the cleaned files are located
        collection_cleaned.update_many({'_id': document['_id']}, {"$set": document}, True)
        all_documents.append(document)

# Write the results in a json file
with open('./output_files/collection.json', 'w') as file:
    file.write('[')
    for document in all_documents:
        file.write(dumps(document))
        file.write(',')
    file.write(']')




STATUS: 0.7575757575757576 %
STATUS: 1.5151515151515151 %
STATUS: 2.272727272727273 %
STATUS: 3.0303030303030303 %
STATUS: 3.787878787878788 %
STATUS: 4.545454545454546 %
STATUS: 5.303030303030303 %
STATUS: 6.0606060606060606 %
STATUS: 7.575757575757576 %
STATUS: 8.333333333333332 %
STATUS: 9.090909090909092 %
STATUS: 9.848484848484848 %
STATUS: 10.606060606060606 %
STATUS: 11.363636363636363 %
STATUS: 12.121212121212121 %
STATUS: 12.878787878787879 %
STATUS: 13.636363636363635 %
STATUS: 14.393939393939394 %
STATUS: 15.151515151515152 %
STATUS: 15.909090909090908 %
STATUS: 16.666666666666664 %
STATUS: 17.424242424242426 %
STATUS: 18.181818181818183 %
STATUS: 18.939393939393938 %
STATUS: 19.696969696969695 %
STATUS: 20.454545454545457 %
STATUS: 21.21212121212121 %
STATUS: 21.96969696969697 %
STATUS: 22.727272727272727 %
STATUS: 23.484848484848484 %
STATUS: 24.242424242424242 %
STATUS: 25.0 %
STATUS: 25.757575757575758 %
STATUS: 26.515151515151516 %
STATUS: 27.27272727272727 %
STATUS: 28

# Printing overall stats

In [5]:
all_documents_including_false = []
all_uuids_including_false = []
all_workerids_including_false = []
all_uuids_including_wrong_code = []

cursor4 = collection.find()
cursor5 = collection.find({"final_code": {"$not": {"$regex": "-PP"}}})

for document in cursor4:
    if "Process" in document:
        continue
    all_uuids_including_false.append(document['uuid'][0])
    all_documents_including_false.append(document)
    if not "worker_id" in document:
        continue
    if len(document['worker_id']) == 0 or not "worker_id" in document:
        continue
    all_workerids_including_false.append(document['worker_id'][0])

unique_worker_ids = set(all_workerids_including_false)
only_mturk = []

# iterate through the set
for string in unique_worker_ids:
    # check if the string is 14 characters long and contains numbers (-> Mturk Participant)
    if len(string) == 14 and any(char.isdigit() for char in string):
        # append to the final list
        only_mturk.append(string)

unique_uuids = set(all_uuids_including_false)
for document in cursor5:
    if not "final_code" in document:
        continue
    all_uuids_including_wrong_code.append(document['uuid'][0])
unique_uuids_wrong_code = set(all_uuids_including_wrong_code)
all_workerids_only_false = []

for count, uuid in enumerate(unique_uuids_wrong_code):
    cursor2 = collection.find({"$and": [{"uuid": {"$regex": uuid}}]})
    for document in cursor2:
        if not "worker_id" in document:
            continue
        if len(document['worker_id']) == 0 or not "worker_id" in document:
            continue
        all_workerids_only_false.append(document['worker_id'][0])

unique_false_worker_ids = set(all_workerids_only_false)
only_false_mturk = []

for string in unique_false_worker_ids:
    if len(string) == 14 and any(char.isdigit() for char in string):
        only_false_mturk.append(string)

# Write the results in a json file
with open('./output_files/collection_including_false.json', 'w') as file:
    file.write('[')
    for document in all_documents_including_false:
        file.write(dumps(document))
        file.write(',')
    file.write(']')

print(f"Participants overall: {len(unique_uuids)}")
print(f"Participants from mturk: {len(only_mturk)}")
print(f"Individual Documents: {len(all_documents_including_false)}")
print(f"False codes from mturk: {len(only_false_mturk)}")
print(f"False codes from other users: {len(unique_uuids_wrong_code) - len(only_false_mturk)}")
print(f"Not finished experiments: {len(unique_uuids) - len(uuid_list) - len(unique_uuids_wrong_code)}")
print(f"Participants that only clicked okay: {len(excluded_uuids)}")
print(f"Participant's (cleaned db): {len(uuid_list)}")
print(f"Individual documents (cleaned db): {len(all_documents)}")



Participants overall: 185
Participants from mturk: 61
Individual Documents: 3123
False codes from mturk: 3
False codes from other users: 3
Not finished experiments: 47
Participants that only clicked okay: 3
Participant's (cleaned db): 132
Individual documents (cleaned db): 2455
