# preliminaries


*   Installations
*   Imports





In [None]:
!pip install -r /content/drive/MyDrive/IKW/Autora_Bandit/req.txt

# TO do list:


*   run-1 : when γ=0
*   run-2 : when γ!=0 but is very small
*   run-3 : when γ is not small
*   run-4 : Define another theorist instead of SINDy part
*   Reform Experimentalist



In [None]:
# *** IMPORTS *** #

# Python Core
from dataclasses import dataclass, field
from typing import List
import random, json

# External Vendors
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
import torch

# General Aut oRA
from autora.variable import VariableCollection, Variable
from autora.state import StandardState, on_state, Delta
"""
Bandit Workflow
    Reward Trajectory as Conditions
    Theorist: Rnn Sindy Theorist
    Experimentalist: Random Sampling + Model Disagreement
    Runner: Synthetic + Firebase Runner + Prolific recruitment)
"""

# *** IMPORTS *** #

# Python Core
from dataclasses import dataclass, field
from typing import List
import random, json

# External Vendors
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
import torch

# General AutoRA
from autora.variable import VariableCollection, Variable
from autora.state import StandardState, on_state, Delta

# Experimentalists
from autora.experimentalist.bandit_random import bandit_random_pool
from autora.experimentalist.model_disagreement import model_disagreement_sampler_custom_distance

# Experiment Runner
from autora.experiment_runner.synthetic.psychology.q_learning import q_learning
from autora.experiment_runner.firebase_prolific import firebase_runner, firebase_prolific_runner

# Theorist
from autora.theorist.rnn_sindy_rl import RNNSindy
from autora.theorist.rnn_sindy_rl.utils.parse import parse as parse_equation
RUNNER_TYPE = 'synthetic'

TRIALS_PER_PARTICIPANTS = 100
SAMPLES_PER_CYCLE = 1
PARTICIPANTS_PER_CYCLE = 40
CYCLES = 4
INITIAL_REWARD_PROBABILITY_RANGE = [.2, .8]
SIGMA_RANGE = [.2, .2]

EPOCHS = 10 # 100



seed = 11

# for reproducible results:
if seed is not None:
    np.random.seed(seed)
    torch.manual_seed(seed)

# *** AUTORA SETUP *** #

# ** Set up variables ** #
# independent variable is "reward-trajectory": A 2 x n_trials Vector with entries between 0 and 1
# dependent variable is "choice-trajectory": A 2 x n_trials Vector with boolean entries (one hot encoded)

variables = VariableCollection(
    independent_variables=[Variable(name="reward-trajectory")
    ,Variable(name='previous-choice-trajectory')
    ],
    dependent_variables=[Variable(name="choice-trajectory")]
)


@dataclass(frozen=True)
class RnnState(StandardState):
    models_additional:  List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )

# initialize the state:
state = RnnState(variables=variables)

@on_state()
def pool_on_state(num_samples, n_trials=TRIALS_PER_PARTICIPANTS): ##This only defines the rewards, and not the choices##
    """
    This is creates `num_samples` randomized reward-trajectories of length `n_trials`
    """
    sigma = np.random.uniform(SIGMA_RANGE[0], SIGMA_RANGE[1])
    trajectory_array = bandit_random_pool(
        num_rewards=2,
        sequence_length=n_trials,
        initial_probabilities=[INITIAL_REWARD_PROBABILITY_RANGE, INITIAL_REWARD_PROBABILITY_RANGE],
        sigmas=[sigma, sigma],
        num_samples=num_samples
    )
    trajectory_df = pd.DataFrame({'reward-trajectory': trajectory_array})
    return Delta(conditions=trajectory_df)


def custom_distance(prob_array_a, prob_array_b):
    return np.mean([(prob_array_a[0] - prob_array_b[0])**2 + (prob_array_a[1] - prob_array_b[1])**2])

###############  Experimentalist ##########################

@on_state()
def model_disagreement_on_state(
        conditions, models, models_additional, num_samples):
    conditions = model_disagreement_sampler_custom_distance(
        conditions=conditions['reward-trajectory'],
        models=[models[-1], models_additional[-1]],
        distance_fct=custom_distance,
        num_samples=num_samples,
    )
    return Delta(conditions=conditions)
################# DATA ##########################
runner = q_learning()

@on_state()
def runner_on_state_synthetic(conditions):
    choices, choice_probabilities = runner.run(conditions, return_choice_probabilities=True)
    # print(choices)
    experiment_data = pd.DataFrame({
        'reward-trajectory': conditions['reward-trajectory'].tolist(),
        'choice-trajectory': choices,
        'previous-choice-trajectory': [np.concatenate((np.array([[0,0]]),choices[0][:-1]))],
        'choice-probability-trajectory': choice_probabilities
    })
    return Delta(experiment_data=experiment_data)

####################   Theorist   ####################

theorist = RNNSindy(2, epochs=EPOCHS, polynomial_degree=2)
theorist_additional = RNNSindy(2, epochs=EPOCHS, polynomial_degree=1)

@on_state()
def theorist_on_state(experiment_data):
    # x = [np.array(experiment_data[['reward-trajectory']]),np.array(experiment_data[['previous-choice-trajectory']])]#
    x = experiment_data['reward-trajectory']
    y = experiment_data['choice-trajectory']
    return Delta(models=[theorist.fit(x, y)])


@on_state()
def theorist_additional_on_state(experiment_data):
    x = experiment_data['reward-trajectory']
    y = experiment_data['choice-trajectory']
    return Delta(models_additional=[theorist_additional.fit(x, y)])


################ LOOP #########################


for c in range(1, CYCLES + 1):

    if len(state.models) > 0:
        state = pool_on_state(state, num_samples=20)
        state = model_disagreement_on_state(state, num_samples=SAMPLES_PER_CYCLE)
    else:
        state = pool_on_state(state, num_samples=SAMPLES_PER_CYCLE)

    if RUNNER_TYPE == 'synthetic':
        state = runner_on_state_synthetic(state)

    state = theorist_on_state(state)
    state = theorist_additional_on_state(state)

    model = state.models[-1]
    model_additional = state.models_additional[-1]


    equations_model = parse_equation(model)
    equation_model_additional = parse_equation(model_additional)

    print('# MODEL DEGREE = 2#')
    print(f'chosen: {equations_model[0]}')
    print(f'non chosen: {equations_model[1]}')

    print('# MODEL DEGREE = 1#')
    print(f'chosen: {equation_model_additional[0]}')
    print(f'non chosen: {equation_model_additional[1]}')



Training the RNN...
Epoch 1/10 --- Loss: 2.6254387; Time: 1.5581s; Convergence value: 1.63e+00
Epoch 2/10 --- Loss: 1.5450935; Time: 1.5929s; Convergence value: 1.34e+00
Epoch 3/10 --- Loss: 1.1207993; Time: 2.1299s; Convergence value: 1.02e+00
Epoch 4/10 --- Loss: 0.5710946; Time: 2.3428s; Convergence value: 8.86e-01
Epoch 5/10 --- Loss: 0.2770328; Time: 1.7280s; Convergence value: 7.49e-01
Epoch 6/10 --- Loss: 0.2338850; Time: 1.5957s; Convergence value: 6.07e-01
Epoch 7/10 --- Loss: 0.1286141; Time: 1.5927s; Convergence value: 5.15e-01
Epoch 8/10 --- Loss: 0.1880593; Time: 1.5883s; Convergence value: 4.37e-01
Epoch 9/10 --- Loss: 0.1537335; Time: 1.5860s; Convergence value: 3.71e-01
Epoch 10/10 --- Loss: 0.1805673; Time: 1.5851s; Convergence value: 2.30e-01
Maximum number of training epochs reached.
Model did not converge yet.
Test the trained RNN on a test dataset...
Epoch 1/1 --- Loss: 0.1470104; Time: 0.1356s; Convergence value: nan
Maximum number of training epochs reached.
Mode