In [98]:
working_directory_pattern = re.compile(r"Loading spec file (?P<working_directory>.+?)/spec.json")

In [99]:
for e in entry_data:
    m = working_directory_pattern.fullmatch(e.message)
    
    if m is not None:
        working_directory = m.group("working_directory")
        break

In [107]:
start_pattern = re.compile(r"\[Node\] Setting-up \"(?P<node_name>.+?)\" in \"(?P<node_directory>.+?)\".")
mapnode_directory_pattern = re.compile(working_directory + r"/(?P<node_path>.+?)(:?/mapflow/(?P<sub_node_name>.+?))?")
end_pattern = re.compile(r"\[Job \d+\] Completed \((?P<node_name>.+?)\).")

In [115]:
def convert_node_path(node_path: str) -> str:
    tokens = node_path.split("/")
    
    def filter_token(token: str) -> bool:
        if token.startswith("_"):
            return False
        if token == "mapflow":
            return False
        return True
        
    return ".".join(filter(filter_token, tokens))

In [175]:
intervals = dict()

for e in entry_data:
    m = start_pattern.fullmatch(e.message)
    
    if m is not None:
        node_name = m.group("node_name")
        
        if not node_name.startswith("nipype"):  # map node subnode
            node_directory = m.group("node_directory")
            m = mapnode_directory_pattern.fullmatch(node_directory)
            
            assert m is not None
            
            node_name = convert_node_path(m.group("node_path"))
            
            if node_name in intervals:  # only update
                if intervals[node_name].start > e.time:
                    intervals[node_name].start = e.time
                continue
        else:
            assert node_name not in intervals
        
        intervals[node_name] = Interval(
            start=e.time,
            end=None,
        )
        continue
        
    m = end_pattern.fullmatch(e.message)
    
    if m is not None:
        node_name = m.group("node_name")
        
        if not node_name.startswith("nipype"):
            continue  # map node subnode
        
        intervals[node_name].end = e.time
        continue

In [142]:
print(sum(
    [
        interval.end - interval.start
        for key, interval in intervals.items()
        if key.startswith("nipype.fmriprep_wf")
    ],
    timedelta()
))

4:13:22.817900


In [143]:
print(sum(
    [
        interval.end - interval.start
        for key, interval in intervals.items()
        if key.startswith("nipype.settings_wf")
    ],
    timedelta()
))

8:46:40.093200


In [176]:
grouped_intervals = defaultdict(Interval)

for key, interval in intervals.items():
    if not key.startswith("nipype.settings_wf"):
        continue

    tokens = key.split(".")

    step = tokens[4]

    grouped_intervals[step].update(interval)

In [177]:
for key, interval in grouped_intervals.items():
    print(key, interval.duration)

alt_bold_std_trans_wf 1:08:33.973000
gaussian_bandpass_filter_wf_jhpg 1:28:31.003900
gaussian_bandpass_filter_wf_zwiu 1:37:47.992400
fmriprep_adapter_wf 0:19:51.970900
ica_aroma_components_wf 0:45:51.988800
smoothing_6000_wf_2454 0:36:03.924100
grand_mean_scaling_10000_wf_tfek 0:17:34.963000
ica_aroma_regression_wf_tjof 0:26:36.987000
setting_adapter_wf_of3z 0:16:31.954500
confounds_select_hgui_wf_jrmg 0:16:26.943400
setting_adapter_wf_xikp 0:19:52.005100


In [188]:
var = ["alt_bold_std_trans_wf", "ica_aroma_components_wf", "grand_mean_scaling_10000_wf_tfek", "smoothing_6000_wf_2454", "gaussian_bandpass_filter_wf_jhpg"]

interval = Interval()

for v in var:
    interval.update(grouped_intervals[v])

print(interval.duration)

1:35:37.027400


In [None]:
var = ["alt_bold_std_trans_wf", "ica_aroma_components_wf", "grand_mean_scaling_10000_wf_tfek", "smoothing_6000_wf_2454", "gaussian_bandpass_filter_wf_jhpg"]

interval = Interval()

for v in var:
    interval.update(grouped_intervals[v])

print(interval.duration)

In [139]:
durations = defaultdict(timedelta)

for key, interval in intervals.items():
    if not key.startswith("nipype.features_wf"):
        continue
        
    tokens = key.split(".")
    
    step = tokens[4]
    
    durations[step] += interval.end - interval.start

In [145]:
durations

defaultdict(datetime.timedelta,
            {'alt_bold_std_trans_wf': datetime.timedelta(seconds=3282, microseconds=77300),
             'gaussian_bandpass_filter_wf_jhpg': datetime.timedelta(seconds=1367, microseconds=608600),
             'gaussian_bandpass_filter_wf_zwiu': datetime.timedelta(seconds=1146, microseconds=500200),
             'fmriprep_adapter_wf': datetime.timedelta(seconds=250, microseconds=772000),
             'ica_aroma_components_wf': datetime.timedelta(seconds=15194, microseconds=943600),
             'smoothing_6000_wf_2454': datetime.timedelta(seconds=9467, microseconds=88500),
             'grand_mean_scaling_10000_wf_tfek': datetime.timedelta(seconds=331, microseconds=400),
             'ica_aroma_regression_wf_tjof': datetime.timedelta(seconds=451, microseconds=924200),
             'setting_adapter_wf_of3z': datetime.timedelta(seconds=51, microseconds=407200),
             'confounds_select_hgui_wf_jrmg': datetime.timedelta(seconds=8, microseconds=18600),
