In [73]:
import re

regex = r"^(?P<model>\w+) <- hBayesDM_model\(.+?preprocess_func = (?P<func>function.+?})\n\)"

In [74]:
from pathlib import Path

path_cwd = Path.cwd()

In [75]:
fns = sorted(path_cwd.glob('*.R'))

In [76]:
matches = {}
for fn in fns:
    with open(fn, 'r') as f:
        codes = f.readlines()

    match = list(re.finditer(regex, ''.join(codes), re.MULTILINE | re.DOTALL))
    if len(match) > 0:
        matches[fn.name] = {
            'model': match[0].group('model'),
            'func': match[0].group('func').replace('\n  ', '\n')
        }

In [79]:
tasks = sorted(set([k.split('.')[0].split('_')[0] for k in matches.keys()]))

In [84]:
funcs_task = {}
for k, v in matches.items():
    task = k.split('.')[0].split('_')[0]
    code = '{task}_preprocess_func <- {func}'\
        .format(model=v['model'], task=task, func=v['func'])
    if task not in funcs_task:
        funcs_task[task] = {v['model']: code}
    elif code not in funcs_task[task].values():
        funcs_task[task][v['model']] = code

In [88]:
funcs = {t: [f'# From {k}\n' + v for k, v in kv.items()] for t, kv in funcs_task.items()}

In [92]:
with open('preprocess_funcs.R', 'w') as f:
    for t, fs in funcs.items():
        f.write(f'#### {t}\n\n')
        for func in fs:
            f.writelines(func)
            f.write('\n\n')
        f.write('\n\n\n')

In [82]:
import difflib

In [83]:
for k, v in {k: v for k, v in funcs_task.items() if len(v) > 1}.items():
    print(k)
    before = funcs_task[k][0].split('\n')
    after = funcs_task[k][1].split('\n')
    for line in difflib.context_diff(before, after):
        print(line)
    print('\n\n\n################\n\n\n')

choiceRT
*** 

--- 

***************

*** 1,40 ****

  choiceRT_preprocess_func <- function(raw_data, general_info, RTbound = 0.1) {
!   # Use raw_data as a data.frame
!   raw_data <- as.data.frame(raw_data)
  
!   # Use general_info of raw_data
!   subjs   <- general_info$subjs
!   n_subj  <- general_info$n_subj
! 
!   # Number of upper and lower boundary responses
!   Nu <- with(raw_data, aggregate(choice == 2, by = list(y = subjid), FUN = sum)[["x"]])
!   Nl <- with(raw_data, aggregate(choice == 1, by = list(y = subjid), FUN = sum)[["x"]])
! 
!   # Reaction times for upper and lower boundary responses
!   RTu <- array(-1, c(n_subj, max(Nu)))
!   RTl <- array(-1, c(n_subj, max(Nl)))
!   for (i in 1:n_subj) {
!     subj <- subjs[i]
!     subj_data <- subset(raw_data, raw_data$subjid == subj)
! 
!     RTu[i, 1:Nu[i]] <- subj_data$rt[subj_data$choice == 2]  # (Nu/Nl[i]+1):Nu/Nl_max will be padded with 0's
!     RTl[i, 1:Nl[i]] <- subj_data$rt[subj_data$choice == 1]  # 0 padding is skipp