# [RnnSindy](https://github.com/AutoResearch/autora-theorist-rnn-ddm) Theorist and Synthetic Runner

Install the packages

In [None]:
# !pip install autora-theorist-rnn-sindy-rl
# !pip install autora-experimentalist-bandit-random

import packages

In [None]:
# Python Core
from dataclasses import dataclass, field
from typing import Optional, List

# External Vendors
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
import torch

# General AutoRA
from autora.variable import VariableCollection, Variable
from autora.state import StandardState, on_state, Delta

# Experimentalists
from autora.experimentalist.bandit_random import bandit_random_pool
from autora.experimentalist.model_disagreement import model_disagreement_sampler_custom_distance

# Experiment Runner
from autora.experiment_runner.synthetic.psychology.q_learning import q_learning

# Theorist
from autora.theorist.rnn_sindy_rl import RNNSindy

Setting constants

In [None]:
TRIALS_PER_PARTICIPANTS = 100
SAMPLES_PER_CYCLE = 1
PARTICIPANTS_PER_CYCLE = 40
CYCLES = 4
INITIAL_REWARD_PROBABILITY_RANGE = [.2, .8]
SIGMA_RANGE = [.2, .2]

EPOCHS = 10 # 100

seed = 11

Setting seeds for reproducible results

In [None]:
np.random.seed(seed)
torch.manual_seed(seed)

## Set up variables

independent variable is "reward-trajectory": A 2 x n_trials Vector with entries between 0 and 1
dependent variable is "choice-trajectory": A 2 x n_trials Vector with boolean entries (one hot encoded)

In [None]:
variables = VariableCollection(
    independent_variables=[Variable(name="reward-trajectory")],
    dependent_variables=[Variable(name="choice-trajectory")]
)

## State

We use a non-standard state by extending the standard state with an additional model 

In [None]:
@dataclass(frozen=True)
class RnnState(StandardState):
    models_additional:  List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )

# initialize the state:
state = RnnState(variables=variables)


## Autora Components
### Experimentalists
#### Random Pool

Create a pooler on state that creates a pool of conditions

In [None]:
@on_state()
def pool_on_state(num_samples, n_trials=TRIALS_PER_PARTICIPANTS):
    """
    This is creates `num_samples` randomized reward trajectories of length `n_trials`
    """
    sigma = np.random.uniform(SIGMA_RANGE[0], SIGMA_RANGE[1])
    trajectory_array = bandit_random_pool(
        num_rewards=2,
        sequence_length=n_trials,
        initial_probabilities=[INITIAL_REWARD_PROBABILITY_RANGE, INITIAL_REWARD_PROBABILITY_RANGE],
        sigmas=[sigma, sigma],
        num_samples=num_samples
    )
    trajectory_df = pd.DataFrame({'reward-trajectory': trajectory_array})
    return Delta(conditions=trajectory_df)

In [None]:
state = pool_on_state(state, num_samples=3)
state.conditions

### Experiment Runner

Here, we create a synthetic runner that uses a q-learning algorithm

In [None]:
runner = q_learning()

@on_state()
def runner_on_state(conditions):
    choices, choice_probabilities = runner.run(conditions, return_choice_probabilities=True)
    experiment_data = pd.DataFrame({
        'reward-trajectory': conditions['reward-trajectory'].tolist(),
        'choice-trajectory': choices,
        'choice-probability-trajectory': choice_probabilities
    })
    return Delta(experiment_data=experiment_data)

In [None]:
state = runner_on_state(state)
state.experiment_data

### Theorists
Here we create two RNNSindy theorists


In [None]:
theorist = RNNSindy(2, epochs=EPOCHS, polynomial_degree=2)
theorist_additional = RNNSindy(2, epochs=EPOCHS, polynomial_degree=1)

@on_state()
def theorist_on_state(experiment_data):
    x = experiment_data['reward-trajectory']
    y = experiment_data['choice-trajectory']
    return Delta(models=[theorist.fit(x, y)])


@on_state()
def theorist_additional_on_state(experiment_data):
    x = experiment_data['reward-trajectory']
    y = experiment_data['choice-trajectory']
    return Delta(models_additional=[theorist_additional.fit(x, y)])

In [None]:
state = theorist_additional_on_state(state)
state = theorist_on_state(state)

print(len(state.models_additional))
print(len(state.models))


In [None]:
state.models[-1].predict(state.conditions)

Here, we see the prediction for a model is a list of two-dimensional vectors:
array([[0.5, 0.5], [0.68..., 0.31...], ...]). 
The standard model disagreement sampler only works on predictions that are single numbers. Therefore, we define our own distance functions, that works on two lists with the described format 

In [None]:
def custom_distance(prob_array_a, prob_array_b):
    return np.mean([(prob_array_a[0] - prob_array_b[0])**2 + (prob_array_a[1] - prob_array_b[1])**2])

# test 
pred_1 = state.models[-1].predict(state.conditions)[0]  # first prediction of model 1
pred_2 = state.models_additional[-1].predict(state.conditions)[0]  # first prediction of model 2

custom_distance(pred_1, pred_2)

We can now use the `custom_distance` function in our sampler:

In [None]:
@on_state()
def model_disagreement_on_state(
        conditions, models, models_additional, num_samples):
    conditions = model_disagreement_sampler_custom_distance(
        conditions=conditions['reward-trajectory'],
        models=[models[-1], models_additional[-1]],
        distance_fct=custom_distance,
        num_samples=num_samples,
    )
    return Delta(conditions=conditions)

Now, we can run a full loop with a rnn synthetic model

In [None]:
state = RnnState(variables=variables)

In [None]:
for c in range(1, CYCLES + 1):
    
    if len(state.models) > 0:
        state = pool_on_state(state, num_samples=20)
        state = model_disagreement_on_state(state, num_samples=SAMPLES_PER_CYCLE)
    else:
        state = pool_on_state(state, num_samples=SAMPLES_PER_CYCLE)
    
    state = runner_on_state(state)
    
    state = theorist_on_state(state)
    state = theorist_additional_on_state(state)


In [None]:
out = state.models[-1].predict(state.conditions['reward-trajectory'])