In [1]:
%matplotlib inline
import os.path as op
from glob import glob
from collections import Counter

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
classes = ['accepted', 'ignored', 'rejected']
ign_codes = ['I005', 'I007', 'I010', 'I011']
rej_codes = ['I002', 'I003', 'I004', 'I006', 'I008', 'I009']
all_codes = sorted(ign_codes + rej_codes)

base_dir = '/scratch/tsalo006/reliability_analysis/tedana_outputs/'

In [3]:
columns = ['label', 'failure', 'duration', 'accepted', 'rejected', 'ignored'] + all_codes
summary_df = pd.DataFrame(columns=columns)

ted_dirs = sorted(glob(op.join(base_dir, 'logging-seed*')))
comptables = sorted(glob(op.join(base_dir, 'sub-*_seed-*_comptable.txt')))
labels = [op.basename(c) for c in comptables]
labels = [c.replace('_comptable.txt', '') for c in labels]

for label in labels:
    sub, seed = label.split('_')
    seed = seed.split('-')[-1]
    comptable = pd.read_csv(
        op.join(base_dir, '{0}_seed-{1}_comptable.txt'.format(sub, seed)), sep='\t')

    log_df = pd.read_csv(op.join(base_dir, '{0}_seed-{1}_log.tsv'.format(sub, seed)),
                         sep='\t', header=None,
                         names=['time', 'module', 'level', 'message'])
    log_df['time'] = pd.to_datetime(log_df['time'])
    duration = log_df['time'].iloc[-1] - log_df['time'].iloc[0]
    minutes = duration.total_seconds() / 60.
    failure = any(log_df['message'].str.contains('ICA failed to converge'))
    rationales = comptable['rationale'].tolist()
    classifications = comptable['classification'].tolist()
    rationales = [[m for m in r.split(';') if m] for r in rationales if isinstance(r, str)]
    rationales = [item for sublist in rationales for item in sublist]
    counter = dict(Counter(rationales))
    counter['subject'] = sub
    counter['seed'] = seed

    counter2 = dict(Counter(classifications))
    for k, v in counter2.items():
        counter[k] = v

    counter['failure'] = failure
    counter['duration'] = minutes
    summary_df = summary_df.append(counter, ignore_index=True).fillna(0)

IndexError: single positional indexer is out-of-bounds

In [None]:
summary_df

In [None]:
fig, ax = plt.subplots(figsize=(8, 10))
sns.boxplot(x="failure", y="duration", data=summary_df,
            whis="range", palette="vlag")

# Add in points to show each observation
sns.swarmplot(x="failure", y="duration", data=summary_df,
              size=20, color=".3", linewidth=0)

ax.set_ylabel('Duration (minutes)')
ax.set_xlabel('ICA Convergence?')
ax.set_xticklabels(['Yes', 'No'])
fig.show()

In [None]:
clf_df = summary_df.melt(value_vars=['accepted', 'rejected', 'ignored'],
                         var_name='classification', value_name='n components',
                         id_vars=['label', 'failure'])
clf_df2 = summary_df.melt(value_vars=['accepted', 'rejected', 'ignored'],
                         var_name='classification', value_name='n components',
                         id_vars=['label'])
clf_df2['failure'] = 'Total'
df2 = pd.concat((clf_df, clf_df2))

In [None]:
#sns.swarmplot(x='classification', y='n components', data=clf_df)
fig, axes = plt.subplots(ncols=2, sharey=True, sharex=True, figsize=(8, 10))
sns.boxplot(x="classification", y="n components", data=clf_df,
            whis="range", palette="vlag", ax=axes[0])

# Add in points to show each observation
sns.swarmplot(x="classification", y="n components", data=clf_df,
              size=5, color=".3", linewidth=0, ax=axes[0])

sns.boxplot(x="classification", y="n components", data=clf_df,
            whis="range", palette="vlag", ax=axes[1], hue='failure')

# Add in points to show each observation
sns.swarmplot(x="classification", y="n components", data=clf_df,
              size=5, color=".3", linewidth=0, ax=axes[1], hue='failure', dodge=True)

axes[0].set_ylabel('Number of Components')
axes[0].set_xlabel('Classification')
axes[1].set_xlabel('Classification')
axes[1].set_ylabel('')
axes[0].set_xticklabels([l.get_text().title() for l in axes[0].get_xticklabels()])

fig.tight_layout()
fig.show()

In [None]:
#sns.swarmplot(x='classification', y='n components', data=clf_df)
fig, ax = plt.subplots(figsize=(8, 10))
sns.boxplot(x="classification", y="n components", data=df2,
            whis="range", palette="vlag", hue='failure')

# Add in points to show each observation
sns.swarmplot(x="classification", y="n components", hue='failure', data=df2,
              size=5, color=".3", linewidth=0, dodge=True)

ax.set_ylabel('Number of Components')
ax.set_xlabel('Classification')
ax.set_xticklabels([l.get_text().title() for l in ax.get_xticklabels()])
fig.show()