In [71]:
import numpy as np
import pandas as pd
import random

In [72]:
# np.random.seed(123)

In [98]:
def sample_bernouli(theta = .5, n_samples = 1):
    """
    Generating samples at random from Bernouli density funtion
    """
    return (np.random.rand(n_samples) <= theta).astype(int)


def task_act_stim(n_ses = 2, n_run = 2, n_trials = 42,  prop = [.8, .7]):
    """
    This function performs the task desgin for Action an Stimulus Values learning
    as Probabilistic Reiforcement Learning task to test both Action and Rewarding mechanisms of Dopemiergic system
    in Parkinson disease.
    
    Arguments
    ----------
    n_ses: int
        The number of sessions
    n_run: int
        The number of runs in each sessions
    n_trials: int
        The number of observations fro each run and each session 
    act : bool
        True is Action Value learning and False is Stimulus Value Learning
    prop: float, list
        The probability of correcting pushed response against to pulled response for two runs
        (Yellow response against to Blue response)
        The values should be between 0 and 1
        
    Output
    -------
    data : pandas.DataFrame
        Columns contains:'session',
                         'run',
                         'block',
                         'trialNumber',  
                         'leftCanBePushed',
                         'yellowOnLeftSide',
                         'winAmtLeft',
                         'winAmtRight',
                         'winAmtPushable',
                         'winAmtPullable',
                         'winAmtYellow',
                         'winAmtBlue',
                         'pushCorrect',
                         'yellowCorrect'

    """   
    
    # The oputpu dataframe for task design generated by computer
    data = pd.DataFrame(columns=['session',
                                 'run',
                                 'block',
                                 'trialNumber',  
                                 'leftCanBePushed',
                                 'yellowOnLeftSide',
                                 'winAmtLeft',
                                 'winAmtRight',
                                 'winAmtPushable',
                                 'winAmtPullable',
                                 'winAmtYellow',
                                 'winAmtBlue',
                                 'pushCorrect',
                                 'yellowCorrect'])

    idx = 0   
    for s in range(n_ses):
        session = s 
        for r in range(n_run):
            run = r
            if r==1:
                # Set the Action and Stimulus block
                block = 'Act' 
            else:
                block = 'Stim'

            # The number of simulated observations as trials
            trialNumber = np.arange((2*s   + r)*n_trials + 1,  (2*s + r + 1)*n_trials + 1)
            idx = idx + 1
            
            # counterbalanced ledft and right action and stimulus response
            leftCanBePushed = sample_bernouli(theta = .5, n_samples = n_trials)
            yellowOnLeftSide = sample_bernouli(theta = .5, n_samples = n_trials)

            # The amound of winning reward for left side
            winAmtLeft = np.round(np.linspace(1,99, n_trials)).astype(int)
            np.random.shuffle(winAmtLeft) 
            # The amound of winning reward for right side
            winAmtRight = 100 - winAmtLeft  

            winAmtPushable = leftCanBePushed*winAmtLeft + (1 - leftCanBePushed)*winAmtRight
            winAmtPullable = 100 - winAmtPushable

            winAmtYellow = yellowOnLeftSide*winAmtLeft + (1 - yellowOnLeftSide)*winAmtRight
            winAmtBlue = 100 - winAmtYellow


            # Action Value learning block vs Stimulus Learning value block
            if(block == 'Act'):
                # the reward probability of pushed response
                pushCorrect = sample_bernouli(theta = prop[r], n_samples = n_trials)
                # Chanced probability of stimulus response
                yellowCorrect = sample_bernouli(theta = .5, n_samples = n_trials)
            else:      
                # Chanced probability of pushed response
                pushCorrect = sample_bernouli(theta = .5, n_samples = n_trials)
                # the reward probability of stimulus response
                yellowCorrect = sample_bernouli(theta = prop[r], n_samples = n_trials)
                
            temp_dat = pd.DataFrame(data={'session':session,
                                          'run':run,
                                          'block':block,
                                         'trialNumber':trialNumber,  
                                         'leftCanBePushed':leftCanBePushed,
                                         'yellowOnLeftSide':yellowOnLeftSide,
                                         'winAmtLeft':winAmtLeft,
                                         'winAmtRight':winAmtRight,
                                         'winAmtPushable':winAmtPushable,
                                         'winAmtPullable':winAmtPullable,
                                         'winAmtYellow':winAmtYellow,
                                         'winAmtBlue':winAmtBlue,
                                         'pushCorrect':pushCorrect,
                                         'yellowCorrect':yellowCorrect})
            data = pd.concat((data, temp_dat))


     
    return data

In [106]:
task_act_stim()[0:50]

Unnamed: 0,session,run,block,trialNumber,leftCanBePushed,yellowOnLeftSide,winAmtLeft,winAmtRight,winAmtPushable,winAmtPullable,winAmtYellow,winAmtBlue,pushCorrect,yellowCorrect
0,0,0,Stim,1,1,0,34,66,34,66,66,34,1,1
1,0,0,Stim,2,0,0,73,27,27,73,27,73,0,0
2,0,0,Stim,3,1,1,27,73,27,73,27,73,0,1
3,0,0,Stim,4,1,0,13,87,13,87,87,13,0,1
4,0,0,Stim,5,0,0,68,32,32,68,32,68,0,1
5,0,0,Stim,6,1,1,97,3,97,3,97,3,0,1
6,0,0,Stim,7,1,1,80,20,80,20,80,20,0,1
7,0,0,Stim,8,1,0,3,97,3,97,97,3,1,1
8,0,0,Stim,9,0,0,82,18,18,82,18,82,0,1
9,0,0,Stim,10,1,1,54,46,54,46,54,46,0,1


In [143]:
def simulate_rl(task_act_stim, alpha_A, alpha_C, weight, beta, n_trilas = 10, init_probability=.5):
    """
    General Comment   
    ----------
    
    Simulates a individual behavior for Action and Stimulus Value Learning 
    according to a RL model with the weightening parameter,

    Notw that in this simulation, a simple Rescorla-Wagner rule is used for reinforcement learning
    and the softmax function is used for the choice response

    This function is to simulate data for, for example, parameter recovery.
    Simulates data for one participant.
    
    Two rewarded feedback and non-rewarded feedback are presented in each trial.
  

    Arguments
    ----------

    task_frame : pandas.DataFrame
         Size of n_trials rows.
         Columns are related to Action and Stimulus Learning Values task containing:
            'pushedChosen': 1 if participant pushed and 0 if participant pulled 
            'yellowChosen': 1 if participant chose yellow color and 0 if participant chose blue color
            'winAmtPushable': the amount of feedback when participant pushed correctly, between  [0, 100]
            'winAmtYellow  the amount of feedback when participant selected yellow color correcly, between  [0, 100]
            'w': rewarded feedback coded to 1 and non-rewarded feedback coded to 0
        
    alpha_A : float [0, 1]
        The learning rate related to Action Value Learning.
      
    alpha_C : float [0, 1]
        The learning rate related to Color Value Learning.
      
    weight : float [0, 1]
        The reelative contribution of Action and Stimulus Values Learning to get rewarded.

    bet : float  [0, )
        The sensitivity parameter in the soft_max choice rule.
        the higher value leads to the more sensitivity to value differences between two options

    init_probability : float in [0, 1] (default .5)
        The initial value for the probability of reward.

    Output
    -------

    data : pandas.DataFrame
         Columns contains the task_frame, plus:
        'alpha_A', 'alpha_C', 'weight', 'bet', 'w'

    """
    
    task_fram = {'alpha_A':[],
                 'alpha_C':[],
                 'alpha_A':[],
                 'weight':[],
                 'bet':[],
                 'alpha_A':[],
                 'alpha_A':[],                
                }
    data = task_frame.copy()
 , 
    data['alpha_A'] = alpha_A
    data['alpha_C'] = alpha_C
    data['weight'] = weight
    data['bet'] = bet
    
    for n in range(n_trials):
        
    data = pd.concat([data, _simulate_delta_rule_2A(task_design=task_design,
                                                                   alpha=gen_alpha,
                                                                   initial_value_learning=initial_value_learning)],
                         axis=1)

    elif type(gen_alpha) is list:
        if len(gen_alpha) == 2:
            data['alpha_pos'] = gen_alpha[0]
            data['alpha_neg'] = gen_alpha[1]
            data = pd.concat([data, _simulate_delta_rule_2A(task_design=task_design,
                                                                       alpha=None,
                                                                       initial_value_learning=initial_value_learning,
                                                                       alpha_pos=gen_alpha[0],
                                                                       alpha_neg=gen_alpha[1])],
                             axis=1)

        elif len(gen_alpha) == 3:
            pass # implement here Stefano's learning rule
        else:
            raise ValueError("The gen_alpha list should be of either length 2 or 3.")
    else:
        raise TypeError("The gen_alpha should be either a list or a float/int.")

    data['sensitivity'] = gen_sensitivity
    data['p_cor'] = data.apply(_soft_max_2A, axis=1)
    data['accuracy'] = stats.bernoulli.rvs(data['p_cor'].values) # simulate choices

    data = data.set_index(['participant', 'block_label', 'trial_block'])
    return data


def delta_rule(task_design, alpha, initial_value_learning):
    """Q learning (delta learning rule) for two alternatives
    (one correct, one incorrect).

    Parameters
    ----------

    task_design : DataFrame
        `pandas.DataFrame`, with n_trials_block*n_blocks rows.
        Columns contain:
        "f_cor", "f_inc", "trial_type", "cor_option", "inc_option",
        "trial_block", "block_label", "participant".

    alpha : float
        The generating learning rate.
        It should be a value between 0 (no updating) and 1 (full updating).

    alpha_pos : float, default None
        If a value for both alpha_pos and alpha_neg is provided,
        separate learning rates are estimated
        for positive and negative prediction errors.

    alpha_neg : float, default None
        If a value for both alpha_pos and alpha_neg is provided,
        separate learning rates are estimated
        for positive and negative prediction errors.

    initial_value_learning : float
        The initial value for Q learning.

    Returns
    -------

    Q_series : Series
        The series of learned Q values (separately for correct and incorrect options).

    """
    alpha = np.array([alpha])

    n_trials = task_design.shape[0]

    for n in range(n_trials):
        index_cor = int(task_design.cor_option.values[n]-1)
        Q = initial_value_learning
        else:
            if separate_learning_rates:
                pe_cor = task_design.f_cor.values[n] - Q[index_cor]
                pe_inc = task_design.f_inc.values[n] - Q[index_inc]
                if pe_cor > 0:
                    Q[index_cor] += alpha_pos[index_participant]*(task_design.f_cor.values[n] - Q[index_cor])
                else:
                    Q[index_cor] += alpha_neg[index_participant]*(task_design.f_cor.values[n] - Q[index_cor])
                if pe_inc > 0:
                    Q[index_inc] += alpha_pos[index_participant]*(task_design.f_inc.values[n] - Q[index_inc])
                else:
                    Q[index_inc] += alpha_neg[index_participant]*(task_design.f_inc.values[n] - Q[index_inc])
            else:
                Q[index_cor] += alpha[index_participant]*(task_design.f_cor.values[n] - Q[index_cor])
                Q[index_inc] += alpha[index_participant]*(task_design.f_inc.values[n] - Q[index_inc])

        Q_cor = np.append(Q_cor, Q[index_cor])
        Q_inc = np.append(Q_inc, Q[index_inc])

    return pd.DataFrame({'Q_cor':Q_cor, 'Q_inc':Q_inc})

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 64)