In [None]:
import utils
import pandas as pd
from pprint import pprint
import numpy as np

In [None]:
quizzes = utils.Quizzes()

In [None]:
answers = utils.load_log('answers')

In [None]:
# Load in all quiz data and get version metadata
for _, row in answers.iterrows():
    quizzes.get(row)    

# Convert hashes to version numbers
answers['version'] = answers.apply(lambda row: quizzes.get(row)['version'], axis=1)

# Convert UTC timestamp to datetime
answers['timestamp'] = pd.to_datetime(answers['timestamp'], unit='ms')

answers = answers[answers.attempt == 0]

# Only keep the latest complete answer for a given user/quiz pair
get_latest = lambda group: group.iloc[group.timestamp.argmax()]
did_complete_quiz = lambda row: len(row.answers) == len(quizzes.get(row)['schema']['questions'])
groups = ['sessionId', 'quizName', 'quizHash']
answers = answers \
    .loc[lambda df: df.apply(did_complete_quiz, axis=1)] \
    .groupby(groups) \
    .apply(get_latest) \
    .drop(columns=groups) \
    .reset_index()
                                                    
answers['frac_correct'] = answers.answers.map(lambda a: len([o for o in a if o['correct']]) / len(a))

# Quiz-level performance

In [None]:
from scipy import stats

In [None]:
def ci(ser):
    mu = ser.mean()
    sigma = ser.sem()
    return f'{mu:.02f} ± {2*sigma:.02f} (N = {len(ser)})'
answers.groupby(['quizName', 'version']).frac_correct.apply(ci)

In [None]:
answers.groupby('quizName').frac_correct.describe()[['count', 'mean', '50%']].sort_values('mean')

# Question-level performance

In [None]:
from statsmodels.stats.proportion import proportion_confint

for quiz_name, versions in sorted(quizzes.quizzes.items(), key=lambda t: t[0]):
    schema = max(versions['schemas'].values(), key=lambda d: d['version'])['schema']
    quiz_answers = answers[answers.quizName == quiz_name]
    N = len(quiz_answers)
    if N < 2: continue
    
    meta = [
        {
            'n': 0,
            'bad_answers': []
        } 
        for _ in range(len(schema['questions']))
    ]
    for user_answers in quiz_answers.answers:
        for i, answer in enumerate(user_answers):
            m = meta[i]
            if answer['correct']:
                m['n'] += 1
            else:
                m['bad_answers'].append(answer['answer'])
    
    
    print(f'{"="*10} QUIZ: {quiz_name} {"="*10}')
    for i, q in enumerate(schema['questions']):
        m = meta[i]
        lower, upper = proportion_confint(m['n'], N)
        print(f"QUESTION {i+1}")
        print(f'%correct: {m["n"]/N:.02f} ([{lower:.02f} - {upper:.02f}], N = {N})')
        pprint(q['prompt'])
        print(m['bad_answers'])
        print()
    
    print('\n\n')

# User-level performance

In [None]:
answers.groupby('sessionId').frac_correct.describe()[['count', 'mean', '50%']].sort_values('count')