In [294]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import seaborn as sns
from datetime import datetime
import requests


from dotenv import load_dotenv

In [295]:
load_dotenv()

BACKEND_USER = os.environ['BACKEND_USER']
BACKEND_PASSWORD = os.environ['BACKEND_PASSWORD']
BACKEND_URL = os.environ['BACKEND_URL']
# BACKEND_URL = 'http://localhost:5050'
# BACKEND_USER = 'admin'
# BACKEND_PASSWORD = 'admin'

experiment_type = 'reward-network-iii-exp-v2'

In [313]:
from utils.process import process_moves


url = f'{BACKEND_URL}/results'
headers = {'Accept': 'application/json'}
auth = (BACKEND_USER, BACKEND_PASSWORD)
current_datatime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

#---sessions----
sessions = requests.get(f'{url}/sessions?experiment_type={experiment_type}', headers=headers, auth=auth)
sessions_json = sessions.json()


# #---subjects----
subjects = requests.get(f'{url}/subjects', headers=headers, auth=auth)
subjects_df = pd.DataFrame(subjects.json())
s_to_p = subjects_df.set_index('session_id')['prolific_id'].to_dict()

In [297]:
len(sessions_json)

1210

In [298]:
session_df = pd.DataFrame(sessions_json)

In [299]:
written_strategies = []
p_moves = []
player = []
for session in sessions_json:
    advisor = None
    for trial in session['trials']:
        finished_at_dt = datetime.fromisoformat(trial['finished_at']) if trial['finished_at'] is not None else None
        started_at_dt = datetime.fromisoformat(trial['started_at']) if trial['started_at'] is not None else None
        # compute trial duration
        if trial['finished_at'] is None or trial['started_at'] is None:
            duration = None
        else:
            duration = (finished_at_dt - started_at_dt).total_seconds()
        trials_info = {
            'session_id': session['_id'],
            'trial_id': trial['id'],
            'trial_type': trial['trial_type'],
            'trial_started_at': finished_at_dt,
            'trial_finished_at': started_at_dt,
            'trial_duration': duration
        }
        if trial['solution'] is not None:
            moves = process_moves(trial['network'], trial['solution']['moves'], trial['solution']['correctRepeats'])
            if len(moves) > 0:
                moves = [{**m, **trials_info, 'solution_total_score': trial['solution']['score'], 'n_moves': len(trial['solution']['moves'])} for m in moves]
                p_moves.extend(moves)
            else:
                p_moves.append(trials_info)
        else:
            p_moves.append(trials_info)
        if trial['written_strategy'] is not None and session['ai_player'] is False:
            written_strategies.append({
                'session_id': session['_id'],
                'trial_id': trial['id'],
                'text': trial['written_strategy']['strategy'],
            })
        if trial['advisor'] is not None:
            if advisor is None:
                advisor = trial['advisor']['advisor_id']
            else:
                assert advisor == trial['advisor']['advisor_id'], f'advisor changed during session {advisor} {trial["advisor"]["advisor_id"]}'
    ai_str = 'AI' if session['ai_player'] else 'Human'
    player.append({
        'session_id': session['_id'],
        'session_name': f"Rep {session['experiment_num']} - Gen {session['generation']} - {session['condition']} - {session['session_num_in_generation']} - {ai_str}",
        'replication_idx': session['experiment_num'],
        'condition': session['condition'],
        'generation': session['generation'],
        'within_generation_idx': session['session_num_in_generation'],
        'started_at': session['started_at'],
        'time_spend': session['time_spent'],
        'expired': session['expired'],
        'replaced': session['replaced'],
        'completed': session['completed'],
        'finished': session['finished'],
        'ai_player': session['ai_player'],
        'simulated_subject': session['simulated_subject'],
        'advisor': advisor,
        'player_score': session['average_score'],
    })

moves_df = pd.DataFrame(p_moves)
strategy_df = pd.DataFrame(written_strategies)
player_df = pd.DataFrame(player)
# trial_idx = {s: i for i, s in enumerate(moves_df['trial_id'].unique())}
wr_trial_idx = {s: i for i, s in enumerate(strategy_df['trial_id'].sort_values().unique())}

# moves_df['trial_idx'] = moves_df['trial_id'].apply(lambda x: trial_idx[x])

moves_df = player_df.merge(moves_df, on='session_id', how='left')

strategy_df['written_strategy_idx'] = strategy_df['trial_id'].apply(lambda x: wr_trial_idx[x])

strategy_df = strategy_df.sort_values(by=['session_id', 'written_strategy_idx'])

In [300]:
rel_columns = [
    'ai_player', 'trial_started_at', 'trial_finished_at','trial_moves', 'expired', 'replaced', 'completed', 'finished', 'started_at', 'trial_type','n_moves', 'generation', 'trial_duration'
]

In [301]:
moves_df['trial_moves'] = moves_df.groupby(['session_id', 'trial_id'])['move_idx'].transform('count')


In [302]:
trial_df = moves_df.groupby(['session_id', 'trial_id'])[rel_columns].first().reset_index()


trial_df['trial_completed'] = trial_df['trial_moves'] == 10

trial_df['demonstration_trial_completed'] = trial_df['trial_completed'] & (trial_df['trial_type'] == 'demonstration')

trial_df['demonstration_trial_empty'] = (trial_df['trial_moves'] == 1) & (trial_df['trial_type'] == 'demonstration')

trial_df['sum_demonstration_completed'] = trial_df.groupby('session_id')['demonstration_trial_completed'].transform('sum')

trial_df['all_demonstration_completed'] = trial_df['sum_demonstration_completed'] == 4


trial_df['started'] = trial_df['started_at'].isna() == False

trial_df['trial_started'] = trial_df['trial_started_at'].isna() == False
trial_df['trial_finished'] = trial_df['trial_finished_at'].isna() == False


# remove AI
trial_df = trial_df[trial_df['ai_player'] == False]

# remove not started
trial_df = trial_df[trial_df['started']]

# remove gen 1
trial_df = trial_df[trial_df['generation'] == 0]

trial_df['sum_trial_started'] = trial_df.groupby('session_id')['trial_started'].transform('sum')
trial_df['sum_trial_finished'] = trial_df.groupby('session_id')['trial_finished'].transform('sum')

s_df = trial_df.groupby('session_id').first().reset_index()

In [303]:
# started

print('started', trial_df.session_id.nunique())

started 31


In [304]:
# completed

print('completed', trial_df[trial_df['completed']].session_id.nunique(), '\n')

print('started trials for completed', s_df[s_df['completed']].sum_trial_started.value_counts(), '\n')

print('finished trials for completed', s_df[s_df['completed']].sum_trial_started.value_counts(), '\n')

completed 21 

started trials for completed sum_trial_started
19    21
Name: count, dtype: int64 

finished trials for completed sum_trial_started
19    21
Name: count, dtype: int64 



In [305]:
# not completed and not finished

print('not completed and not finished', trial_df[~trial_df['completed'] & ~trial_df['finished']].session_id.nunique(), '\n')

print('started trials for not completed and not finished', s_df[~s_df['completed'] & ~s_df['finished']].sum_trial_started.value_counts(), '\n')

print('finished trials for not completed and not finished', s_df[~s_df['completed'] & ~s_df['finished']].sum_trial_started.value_counts(), '\n')

not completed and not finished 6 

started trials for not completed and not finished sum_trial_started
2    5
4    1
Name: count, dtype: int64 

finished trials for not completed and not finished sum_trial_started
2    5
4    1
Name: count, dtype: int64 



In [306]:
# not completed but finished

print('not completed but finished', trial_df[~trial_df['completed'] & trial_df['finished']].session_id.nunique(), '\n')

print('started trials for completed but finished', s_df[~s_df['completed'] & s_df['finished']].sum_trial_started.value_counts(), '\n')

print('finished trials for completed but finished', s_df[~s_df['completed'] & s_df['finished']].sum_trial_started.value_counts(), '\n')

not completed but finished 4 

started trials for completed but finished sum_trial_started
19    4
Name: count, dtype: int64 

finished trials for completed but finished sum_trial_started
19    4
Name: count, dtype: int64 



## What happened to participants not finishing?

In [317]:
# get not finished session ids

not_completed_session_ids = s_df[~s_df['finished']].session_id.unique()
not_completed_session_ids

array(['65d32e25dd6859254f5c592f', '65d32e25dd6859254f5c5932',
       '65d32e36dd6859254f5c597d', '65d32e36dd6859254f5c597f',
       '65d32e37dd6859254f5c5981', '65d32e37dd6859254f5c5986'],
      dtype=object)

In [328]:
# not finished prolifc ids

not_completed_prolific_ids = [s_to_p[s] for s in not_completed_session_ids]
not_completed_prolific_ids

['5ee8a0884efd881a3e166a65',
 '5c5219e36021330001f990f1',
 '6139e3e9d40ebeda4585aa24',
 '5c9a03e39de0ed0001388d06',
 '5a995b74873cda0001dca553',
 '5ee7dcd80e830a0c4b5c5b18']

5ee8a0884efd881a3e166a65: timed out
5c5219e36021330001f990f1: returned
6139e3e9d40ebeda4585aa24: timed out
5c9a03e39de0ed0001388d06: returned
5a995b74873cda0001dca553: timed out
5ee7dcd80e830a0c4b5c5b18: returned

**All of the participants who did not finish the competition either timed out or returned.**

## What happened to participants finishing but not completing the experiment?

In [320]:
s_df_incoplete = s_df[~s_df['completed'] & s_df['finished']]
trial_df_incoplete = trial_df[~trial_df['completed'] & trial_df['finished']]

In [330]:
trial_df_incoplete.groupby(['session_id','trial_id', 'trial_type', ]).n_moves.value_counts()

session_id                trial_id  trial_type     n_moves
65d32e25dd6859254f5c592d  4         individual     11.0       1
                          5         individual     11.0       1
                          8         individual     11.0       1
                          9         individual     11.0       1
                          10        individual     11.0       1
                          11        individual     11.0       1
                          14        demonstration  11.0       1
                          15        demonstration  11.0       1
                          16        demonstration  11.0       1
                          17        demonstration  10.0       1
65d32e25dd6859254f5c5935  4         individual     11.0       1
                          5         individual     9.0        1
                          8         individual     11.0       1
                          9         individual     11.0       1
                          10        individua

In [325]:
incompleted_session_ids = s_df_incoplete.session_id.unique()

In [327]:
incomplete_prolific_ids = [s_to_p[s] for s in incompleted_session_ids]
incomplete_prolific_ids

['5788c16f275be600013590a8',
 '5995aee73ae1c8000110650d',
 '6317621f1310b8fa030c428a',
 '5af1be59f099a50001fde3bf']

5788c16f275be600013590a8: approved
5995aee73ae1c8000110650d: approved
6317621f1310b8fa030c428a: approved
5af1be59f099a50001fde3bf: approved