In [1]:
# initiation: 2020-02-10T15:00:00+01:00
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats, optimize
import seaborn as sns
import constrNMPy
import os

plt.rcParams['font.sans-serif'] = ['Times New Roman'] # set the font inline to Times New Roman

首先，我们需要编写一个函数来计算每次训练中鸽子的啄食次数。由于每次训练开始时light和bell各自独立地随机产生cue，所以存在[1,1],[1,0],[0,1],[0,0]四种情况。
在每次训练中，我们根据当前状态的链接强度计算该状态下鸽子啄食的期望次数，并按照泊松分布生成实际的啄食次数。
下面的training函数展示了训练的过程，在符合条件下有0.7的概率获得食物。

In [2]:
import numpy as np
from scipy.stats import poisson

def training(w1, w2, tn):
    ''' 
    Simulating the response process in each trial
    Input: 
    w1 & w2 are the association strengths of the two cues
    tn: the trial number
    Output: reward & the number of pecking
    '''
    
    light = np.random.choice([0, 1])
    bell = np.random.choice([0, 1])
    rp = np.random.uniform(0, 1)
    givep = 0.7; nop = 0.05;
    
    # all the possible states and rewards
    if tn <= 100:        
        # when under 100 trials, reward is given when either light or bell is on        
        if light == 1 or bell == 1:
            if rp <= givep:
                reward = 1
            else:
                reward = 0
        else: 
            if rp <= nop:
                reward = 1
            else:
                reward = 0            
    else: # when over 100 trials, reward is given only when light is on
        if light == 1 and bell == 0:
            if rp <= givep:
                reward = 1
            else:
                reward = 0
        else: 
            if rp <= nop:
                reward = 1
            else:
                reward = 0

    # generating responses
    lam = max(100 * (light*w1 + bell*w2), 5)
    N = poisson.rvs(lam)
    
    return light, bell, reward, N


之后我们模拟整个实验过程，给定训练次数和learning rate，每次鸽子通过自己预期和实际的差值更新weight，并且在之后的试次中应用，生成新的lamda。

In [3]:
def simulate_experiment(num_trainings, alpha):
    w1, w2 = 0, 0
    w1_trace = [w1]
    w2_trace = [w2]
    light_trace = []
    bell_trace = []
    reward_trace = []
    response = []
    lst =[]
    
    for i in range(1,num_trainings):
        w1, w2 = w1_trace[i-1], w2_trace[i-1]
        
        [light, bell, reward, N] = training(w1, w2, i)
        
        light_trace.append(light)
        bell_trace.append(bell)
        reward_trace.append(reward)
        response.append(N)

        # update the association strength
        prediction = light*w1 + bell*w2
        error = reward - prediction
        w1 = w1 + alpha * error * light
        w2 = w2 + alpha * error * bell

        lst.append(prediction)
        w1_trace.append(w1)
        w2_trace.append(w2)
        
    return w1_trace, w2_trace, light_trace, bell_trace, reward_trace, response, lst 


将训练的过程可视化：

In [1]:
[w1_trace, w2_trace, light_trace, bell_trace, reward_trace, response, lst] = simulate_experiment(200,0.03);
plt.subplot(4,1,1);
plt.plot(w1_trace);
plt.subplot(4,1,2);
plt.plot(w2_trace);
plt.subplot(4,1,3);
plt.plot(response);
plt.subplot(4,1,4);
plt.plot(lst);

NameError: name 'simulate_experiment' is not defined

In [22]:
w1_trace.pop()# remove the last element
w2_trace.pop()
dt = pd.DataFrame({'light':light_trace, 'bell':bell_trace,'w1':w1_trace, 'w2':w2_trace, 'response':response, 'reward':reward_trace, 'prediction':lst})
dt

Unnamed: 0,light,bell,w1,w2,response,reward,prediction
0,1,0,0.000000,0.000000,8,1,0.000000
1,0,1,0.200000,0.000000,0,1,0.000000
2,0,1,0.200000,0.200000,17,1,0.200000
3,0,1,0.200000,0.360000,28,0,0.360000
4,0,0,0.200000,0.288000,9,0,0.000000
...,...,...,...,...,...,...,...
194,1,1,0.118737,0.121278,28,0,0.240014
195,1,0,0.070734,0.073275,10,0,0.070734
196,1,1,0.056587,0.073275,10,0,0.129862
197,0,0,0.030615,0.047302,5,1,0.000000


再写一个拟合这个过程的函数来对数据集进行拟合：

In [104]:
def rescorla_wagner(alpha, outcomes, response, cues):
    """
    Fit the Rescorla-Wagner model to a set of outcomes and cues.

    :param alpha: learning rate
    :param outcomes: a list of actual outcomes
    ：param response: a list of actual responses；
            response is generated by poisson distribution of lambda = max(100*(w_1*cues[0] + w_2*cues[1]),5)
    :param cues: a list of cues that predicted the outcomes； 
            cues[0] is the cue for the first outcome, cues[1] is the cue for the second outcome
    :return: negative log likelihood of the Rescorla-Wagner model
    """
    # Initialize the prediction error and the negative log likelihood
    error = np.zeros(len(outcomes))
    w_1 = [0] ; w_2 = [0] ; prediction = []
    nll = 0
    
    # Loop through each trial
    for t in range(len(outcomes)):
        
        prediction.append(cues[0][t] * w_1[t] + cues[1][t] * w_2[t])
        # Calculate the prediction error
        error[t] = outcomes[t] - prediction[t]

        lam = max((100*(w_1[t]*cues[0][t] + w_2[t]*cues[1][t])),5)
        prob = poisson.pmf(response[t],lam)
        
        # Calculate the negative log likelihood
        nll += -np.log(prob)
        
        # Update the prediction of the outcome
        w_1.append(w_1[t] + alpha * error[t] * cues[0][t])
        w_2.append(w_2[t] + alpha * error[t] * cues[1][t])

    return nll


In [107]:
# Fit the model to the data
from scipy.optimize import minimize

cuetry = [light_trace, bell_trace]

initial_params = np.array([0.2])
result = minimize(rescorla_wagner, initial_params, args=(reward_trace, response, cuetry))
alpha= result.x
alpha


array([0.19823611])