# Download and preprocess the data

This notebooks is extracting the data from the database and preprocess it to be used for the descriptive and quantitative analysis.

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv

In [None]:
load_dotenv()

BACKEND_URL = 'http://localhost:5050'
BACKEND_USER = 'admin'
BACKEND_PASSWORD = 'admin'

input_folder = '../data/exp_raw'

output_folder = '../data/exp_processed'

# make sure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [3]:
from utils.process import process_moves


# # load from file
import json
file_name = f'{input_folder}/sessions_completed.json'
with open(file_name, 'r') as f:
    sessions_json = json.load(f)


written_strategies = []
p_moves = []
player = []
for session in sessions_json:
    if session['completed'] is False:
        continue
    advisor = None
    for trial in session['trials']:
        if trial['solution'] is not None:
            moves = process_moves(trial['network'], trial['solution']['moves'], trial['solution']['correctRepeats'])
            trials_info = {
                'session_id': session['_id'],
                'trial_id': trial['id'],
                'trial_type': trial['trial_type'],
                'network_id': trial['network']['network_id'],
                'solution_total_score': trial['solution']['score'],
            }
            moves = [{**m, **trials_info} for m in moves]
            p_moves.extend(moves)
        if trial['written_strategy'] is not None and session['ai_player'] is False:
            written_strategies.append({
                'session_id': session['_id'],
                'trial_id': trial['id'],
                'text': trial['written_strategy']['strategy'],
            })
        if trial['advisor'] is not None:
            if advisor is None:
                advisor = trial['advisor']['advisor_id']
            else:
                assert advisor == trial['advisor']['advisor_id'], f'advisor changed during session {advisor} {trial["advisor"]["advisor_id"]}'
    ai_str = 'AI' if session['ai_player'] else 'Human'
    player.append({
        'session_id': session['_id'],
        'session_name': f"Rep {session['experiment_num']} - Gen {session['generation']} - {session['condition']} - {session['session_num_in_generation']} - {ai_str}",
        'replication_idx': session['experiment_num'],
        'condition': session['condition'],
        'generation': session['generation'],
        'within_generation_idx': session['session_num_in_generation'],
        'started_at': session['started_at'],
        'time_spend': session['time_spent'],
        'expired': session['expired'],
        'replaced': session['replaced'],
        'ai_player': session['ai_player'],
        'simulated_subject': session['simulated_subject'],
        'advisor': advisor,
        'player_score': session['average_score'],
    })


In [4]:
moves_df = pd.DataFrame(p_moves)
strategy_df = pd.DataFrame(written_strategies)
player_df = pd.DataFrame(player)
wr_trial_idx = {s: i for i, s in enumerate(strategy_df['trial_id'].sort_values().unique())}

moves_df = player_df.merge(moves_df, on='session_id', how='left')

strategy_df['written_strategy_idx'] = strategy_df['trial_id'].apply(lambda x: wr_trial_idx[x])

strategy_df = strategy_df.sort_values(by=['session_id', 'written_strategy_idx'])

# create folder
os.makedirs(os.path.join(output_folder), exist_ok=True)

s_columns = [
    'session_id', 'trial_id', 'written_strategy_idx', 'text']
strategy_df[s_columns].to_csv(os.path.join(output_folder,'strategy.csv'), index=False)
moves_df.to_csv(os.path.join(output_folder,'moves.csv'), index=False)
player_df.to_csv(os.path.join(output_folder,'player.csv'), index=False)