In [1]:
from csv import DictReader

def read_valid_results(path):
    valid_res = []
    with open(path, 'r') as file:
        reader = DictReader(file)
        next(reader)
        next(reader)
        for item in reader:
            if item['Status'] != 'Survey Preview' and item['Q6'] != '' and item['Screen 1'] == 'Yes' and item['Screen 2'] == '40':
                valid_res.append(item)
    return valid_res

In [2]:
from statistics import mean
from image_link_mapping import IMAGE_LINKS
from survey_response import SurveyResponse
from collections import defaultdict
path = '/home/tyebkhad/GeneticBoulders/user_study/Evolving MoonBoard Routes_February 20, 2023_20.45.csv'
valid_res = read_valid_results(path)
calibration_indices = [i for i, v in enumerate(IMAGE_LINKS) if 'calibrate' in v]

responses = [SurveyResponse(res) for res in valid_res]
print(len(responses), 'valid responses')

THRESHOLD = 0.5
reliable_responses = [r for r in responses if (p := r.perc_calibrated_gradeable()) and p >= THRESHOLD]
rr_dict = defaultdict(list)
for r in reliable_responses:
    rr_dict[r.max_climbed].append(r)
print(len(reliable_responses), 'reliable responses')
calib_percs = sorted([(r.max_climbed, r.perc_calibrated_gradeable()) for r in reliable_responses], reverse=True)
gen_percs = sorted([(r.max_climbed, r.perc_generated_gradeable()) for r in reliable_responses], reverse=True)
print('Calibration:', calib_percs)
print('Generated:', gen_percs)
print(mean(i[1] for i in gen_percs))

# look at correlation between max grade and being accurate
# 

# print(sorted((r.max_gradeable() for r in responses), reverse=True))
# percs = [r.perc_calibrated_gradeable() for r in responses]
# print(sorted(percs, reverse=True))
# for resp in responses:
#     print(resp.perc_calibrated_gradeable())

# got_v4 = []
# for item in valid_res:
#     v4_resp = item['17_Q10']
#     if 'V4' in v4_resp or 'V5' in v4_resp:
#         got_v4.append(item)
# print(len(got_v4))
# for r in got_v4:
#     print(r)

52 valid responses
47 reliable responses
Calibration: [(13, 0.6), (13, 0.5), (12, 0.6), (11, 0.8), (11, 0.6), (10, 0.8), (10, 0.7), (10, 0.7), (10, 0.6), (10, 0.5), (9, 0.8888888888888888), (9, 0.6666666666666666), (9, 0.6666666666666666), (9, 0.6666666666666666), (9, 0.6666666666666666), (9, 0.5555555555555556), (8, 1.0), (8, 0.875), (8, 0.75), (8, 0.75), (8, 0.75), (8, 0.625), (8, 0.5), (8, 0.5), (8, 0.5), (7, 0.8571428571428571), (7, 0.8571428571428571), (7, 0.7142857142857143), (7, 0.7142857142857143), (7, 0.7142857142857143), (7, 0.7142857142857143), (7, 0.5714285714285714), (7, 0.5714285714285714), (7, 0.5714285714285714), (7, 0.5714285714285714), (7, 0.5714285714285714), (6, 0.8333333333333334), (6, 0.6666666666666666), (5, 1.0), (5, 1.0), (5, 0.8), (5, 0.8), (5, 0.8), (4, 1.0), (4, 1.0), (4, 0.75), (4, 0.5)]
Generated: [(13, 0.85), (13, 0.45), (12, 0.7), (11, 0.85), (11, 0.4), (10, 0.85), (10, 0.85), (10, 0.65), (10, 0.6), (10, 0.55), (9, 0.8888888888888888), (9, 0.777777777777

## Recording
- 52 valid responses
### Allowing +- 1 in grade
- 16 responses with >= 50% correct calibration at or below their max climbing grade
    - these responses agreed with predicted grade 30% of the time
### Allowing +- 2 in grade
- 35 responses with >= 50% correct calib at or below max
    - these responses agreed with predicted grade about 55% of the time
### allowing +- 3
- 47 response are valid 
    - agree 71% of the time
