In [35]:
from collections import Counter
from dotenv import dotenv_values
import json
import os
from music_symbol import MusicSymbol
from pathlib import Path
from response_metrics import ResponseMetrics
import sys

# Importing from parent directory
curr_file = Path(os.path.abspath('')).resolve()
sys.path.append(str(curr_file.parent))
from scripts.backup_data import main as load_data

In [36]:
backup_prefix = "testing/20230504-145732"
# Set to None to pull from the database live
# backup_prefix = None

if backup_prefix is None:
    config = dotenv_values("../.env")
    ENDPOINT = config["ENDPOINT"]

    print("Grabbing data from DB")
    users_data, sheets_data, measures_data = load_data(ENDPOINT, "../backups/live")
else:
    print("Loading from backups")
    with open(f"../backups/{backup_prefix}_users_backup.json", "r") as file:
        users_data = json.load(file)

    with open(f"../backups/{backup_prefix}_sheets_backup.json", "r") as file:
        sheets_data = json.load(file)

    with open(f"../backups/{backup_prefix}_measures_backup.json", "r") as file:
        measures_data = json.load(file)

Loading from backups


In [37]:
print(f"# Users: {len(users_data)}")
print(f"# Sheets: {len(sheets_data)}")
print(f"# Measures: {len(measures_data)}")

# Users: 43
# Sheets: 3
# Measures: 48


In [38]:
responses = 0
for measure in measures_data:
    responses += len(measure["responses"])

print(f"# Responses: {responses}")

# Responses: 183


In [39]:
response_count = dict()
for measure in measures_data:
    num_responses = len(measure["responses"])
    if num_responses not in response_count:
        response_count[num_responses] = 0
    
    response_count[num_responses] += 1

print(response_count)

{3: 10, 4: 37, 5: 1}


In [40]:
response_data: dict[str, ResponseMetrics] = {}
crowdsourced_response_data: dict[str, ResponseMetrics] = {}
crowdsourced_agreement_count: int = 0

for measure in measures_data:
    gold_symbols: list[MusicSymbol] = []
    for symbol in measure["goldSymbols"]:
        gold_symbols.append(MusicSymbol(**symbol))
    
    while (len(gold_symbols) < 4):
        gold_symbols.append(None)
    
    response_counter = Counter()

    for response in measure["responses"]:
        response_id = response["_id"]
        user_symbols: list[MusicSymbol] = []
        for symbol in response["symbols"]:
            user_symbols.append(MusicSymbol(**symbol))
        
        while (len(user_symbols) < 4):
            user_symbols.append(None)

        response_counter[tuple(user_symbols)] += 1
    
        response_data[response_id] = ResponseMetrics(user_symbols, gold_symbols)
    
    # no responses, skip
    if len(response_counter) == 0:
        continue

    measure_id = measure["_id"]
    crowdsourced_symbols, primary_count = response_counter.most_common(1)[0]

    if len(response_counter) >= 2:
        # get the count of the second most frequent item
        second_count = response_counter.most_common(2)[1][1]

        # tie, no crowdsourced answer
        if primary_count > second_count:
            crowdsourced_agreement_count += 1
    else:
        crowdsourced_agreement_count += 1
    
    crowdsourced_response_data[measure_id] = ResponseMetrics(list(crowdsourced_symbols), gold_symbols)

In [41]:
all_response_ids = []
for measure in measures_data:
    for response in measure["responses"]:
        all_response_ids.append(response["_id"])

In [42]:
def calculate_statistics(response_metrics_map: dict[str, ResponseMetrics], key_group: list[str], title: str, units: str="responses", print_results: bool=True):
    symbol_num_match_count = 0
    name_match_count = 0
    pitch_match_count = 0
    exact_match_count = 0
    
    pitch_count = 0
    total_count = len(key_group)
    for key in key_group:
        if response_metrics_map[key].full_symbol_count_match():
            symbol_num_match_count += 1

        if response_metrics_map[key].full_name_match():
            name_match_count += 1
        
        pitch_match = response_metrics_map[key].full_pitch_match()
        if pitch_match is not None:
            pitch_count += 1

            if pitch_match:
                pitch_match_count += 1
        
        if response_metrics_map[key].full_exact_match():
            exact_match_count += 1
    
    symbol_num_match_accuracy = symbol_num_match_count / total_count
    name_match_accuracy = name_match_count / total_count
    pitch_match_accuracy = pitch_match_count / pitch_count
    exact_match_accuracy = exact_match_count / total_count

    if print_results:
        print(title)
        print("-" * len(title))
        print(f"{total_count} {units}, {pitch_count} {units} with pitch content")
        print(f"{'Symbol count accuracy:':<35} {symbol_num_match_count}/{total_count} {units} = {symbol_num_match_accuracy * 100:.4f}%")
        print(f"{'Symbol identification accuracy:':<35} {name_match_count}/{total_count} {units} = {name_match_accuracy * 100:.4f}%")
        print(f"{'Symbol pitch accuracy:':<35} {pitch_match_count}/{pitch_count} {units} = {pitch_match_accuracy * 100:.4f}%")
        print(f"{'Symbol exact match accuracy:':<35} {exact_match_count}/{total_count} {units} = {exact_match_accuracy * 100:.4f}%")
    
    return symbol_num_match_accuracy, name_match_accuracy, pitch_match_accuracy, exact_match_accuracy

In [43]:
calculate_statistics(response_data, all_response_ids, title="All response data metrics")
pass

All response data metrics
-------------------------
183 responses, 156 responses with pitch content
Symbol count accuracy:              177/183 responses = 96.7213%
Symbol identification accuracy:     174/183 responses = 95.0820%
Symbol pitch accuracy:              138/156 responses = 88.4615%
Symbol exact match accuracy:        162/183 responses = 88.5246%


In [44]:
calculate_statistics(crowdsourced_response_data, crowdsourced_response_data.keys(), title="Crowdsourced response data metrics", units="measures")
print(f"{'Crowd conclusively agreed on:':<35} {crowdsourced_agreement_count}/{len(crowdsourced_response_data)} measures = {crowdsourced_agreement_count / len(crowdsourced_response_data) * 100:.4f}%")

Crowdsourced response data metrics
----------------------------------
48 measures, 41 measures with pitch content
Symbol count accuracy:              48/48 measures = 100.0000%
Symbol identification accuracy:     48/48 measures = 100.0000%
Symbol pitch accuracy:              40/41 measures = 97.5610%
Symbol exact match accuracy:        47/48 measures = 97.9167%
Crowd conclusively agreed on:       46/48 measures = 95.8333%
