In [None]:
import numpy as np
import json
from scipy.optimize import minimize

In [None]:
with open('result.json', 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

data = {}
scale = 10 # q-value가 너무 크게 변해 alpha가 0에 가까워지는 현상 방지

for subj, lists in raw_data.items():
    choices, gains, losses, rewards = lists
    choices = (np.array(choices, dtype=int) - 1) # 1~4 -> 0~3
    gains = np.array(gains, dtype=float) / scale
    losses = np.array(losses, dtype=float) / scale
    rewards = np.array(rewards, dtype=float) / scale

    data[int(subj)] = {
        "choices": choices,
        "gains": gains,
        "losses": losses,
        "rewards": rewards,
    }

In [None]:
def neg_log_likelihood(params, trials, vs = False):
  if vs: # vs: valence-specific Q-learning
    a_p, a_m, b, w = params # alpha_plus, alpha_minus, beta, weight
    Q_pos = np.zeros(4)
    Q_neg = np.zeros(4)
  else:
    a, b = params # alpha, beta
    Q = np.zeros(4) # initialize Q-values for 4 decks (A, B, C, D)

  nll = 0

  for t in trials:
    if vs:
      action, gain, loss = t
      Q_comb = w * Q_pos - (1 - w) * Q_neg

      logits = b * Q_comb
      logits -= np.max(logits) # overflow 방지
      probs = np.exp(logits)
      probs /= np.sum(probs)
      nll -= np.log(probs[action] + 1e-12)

      Q_pos[action] += a_p * (gain - Q_pos[action])
      if loss != 0:
        Q_neg[action] += a_m * (abs(loss) - Q_neg[action])

    else:
      action, reward = t

      logits = b * Q
      logits -= np.max(logits)
      probs = np.exp(logits)
      probs /= np.sum(probs)
      nll -= np.log(probs[action] + 1e-12)

      Q[action] += a * (reward - Q[action])

  return nll

In [None]:
params_sub = [] ## 각 참가자의 추정 파라미터를 저장할 리스트
init_q = [0.0001, 1.0] # q learning의 초기 alpha, beta
bounds_q = [(0, 1), (0, 1000)] ## alpha, beta bound
init_vs = [0.0001, 0.0001, 1.0, 0.5] # VSQ learning의 초기 a_p, a_m, b, w, weight
bounds_vs = [(0, 1), (0, 1), (0, 1000), (0, 1)] ## bound

## 파라미터 추정하기 (전체 subject)
for subj, d in data.items():
    ## Q leanring 학습 데이터 준비
    trials_q  = list(zip(d["choices"], d["rewards"]))

    ## VSQ 학습 데이터 준비
    trials_vs = list(zip(d["choices"], d["gains"], d["losses"]))

    params_q = minimize(
        neg_log_likelihood, ## 최적화 대상 함수
        x0 = init_q, ## 초기값
        args = (trials_q, False), ## 인자
        bounds = bounds_q ## 파라미터 제한 범위
    )

    params_vs = minimize(
        neg_log_likelihood, ## 최적화 대상 함수
        x0 = init_vs, ## 초기값
        args = (trials_vs, True), #인자
        bounds = bounds_vs #파라미터 제한 범위
    )
    ## 결과 저장
    params_sub.append({
        "subj": subj,
        "q_alpha": params_q.x[0],
        "q_beta": params_q.x[1],
        "vs_alpha_plus": params_vs.x[0],
        "vs_alpha_minus": params_vs.x[1],
        "vs_beta": params_vs.x[2],
        "vs_w": params_vs.x[3],
        "nll_q": params_q.fun,
        "nll_vs": params_vs.fun
    })
    print(f"Subject {subj}: nll_q={params_q.fun:.2f}, nll_vs={params_vs.fun:.2f}")
    print(f"  Q-learning params:  α={params_q.x[0]:.10f}, β={params_q.x[1]:.10f}")
    print(f"  VSQ-learning params: α+={params_vs.x[0]:.10f}, α-={params_vs.x[1]:.10f}, β={params_vs.x[2]:.10f}, w={params_vs.x[3]:.3f}\n")

with open("params_ours.json", "w", encoding="utf-8") as f:
  json.dump(params_sub, f, ensure_ascii=False, indent=4)

Subject 1: nll_q=92.36, nll_vs=75.81
  Q-learning params:  α=0.0000847527, β=259.7964685233
  VSQ-learning params: α+=0.0000630007, α-=0.0004930602, β=738.1551533072, w=0.958

Subject 2: nll_q=54.41, nll_vs=64.48
  Q-learning params:  α=0.0867112009, β=0.8064830945
  VSQ-learning params: α+=0.0010256615, α-=0.0001558659, β=342.5718623652, w=0.048

Subject 3: nll_q=10.26, nll_vs=12.90
  Q-learning params:  α=0.0002352935, β=663.4377877746
  VSQ-learning params: α+=0.0002065520, α-=0.0000000000, β=602.6192697122, w=1.000

