In [1]:
import sys
sys.path.insert(1, '../')

In [2]:
from scipy.io import loadmat
from mvpa2.suite import *
from glob import glob
import re
from os.path import basename
import os
from run_sl import get_unique_combs, flatten
from joblib.parallel import Parallel, delayed
import numpy as np

In [3]:
def permute_conditions(ds, permute=0):
    """Permute the conditions in ds maintaining the hierarchical structure
    of the problem. Permute is the permutation number with max permute = 34.
    Permute = 0 corresponds to the identity permutation
    
    If it finds condition_orig in ds.sa, uses that as original condition 
    to permute"""
    perm = get_unique_combs(8)[permute]
    perm = flatten(perm)
    unique_conds = np.unique(ds.sa.condition)
    mapperm = dict()
    if 'condition_orig' in ds.sa:
        ds.sa['condition'] = ds.sa.condition_orig.copy()
    else:
        ds.sa['condition_orig'] = ds.sa.condition.copy()
    for i, p in enumerate(perm):
        mapperm[unique_conds[i]] = unique_conds[p]
    for i in range(ds.nsamples):
        this_cond = ds.sa.condition[i]
        ds.sa.condition[i] = mapperm[this_cond]
    #print("USING PERMUTATION {0}".format(mapperm))

In [4]:
subjs = [basename(s) for s in glob('../../results/c1c2_subject_stimuli_lo/*')]
assert(len(subjs) == 33)

In [5]:
def run_permutations_subject(subj):
    c1c2 = loadmat('../../results/c1c2_subject_stimuli_lo/{0}/c1c2.mat'.format(subj))
    c1 = c1c2['c1']
    c2 = c1c2['c2']
    # each element is one image, with 8 scales, which are n x n x 4 matrices
    # we want to unroll everything
    c1_stacked = np.vstack([
                    np.hstack([a.flatten() for a in c1_img.flatten()]) 
                    for c1_img in c1.flatten()])
    c2_stacked = np.vstack(c2.reshape((-1)).tolist()).T
    assert(len(c1_stacked) == 24)
    assert(len(c2_stacked) == 24)

    # get labels id
    #fns = glob('../subject_stimuli_lo/{0}/imgs/*[control,friend]*'.format(subj))
    # load the stimuli in the order of computation
    fns = np.loadtxt('../../results/c1c2_subject_stimuli_lo/{0}/imgorder.txt'.format(subj), dtype=str)
    f = re.compile('(control[0-9]|friend[0-9])')
    assert(len(fns) == 24)
    identity = [f.findall(s)[0] for s in fns]

    ds_c1 = Dataset(c1_stacked, sa={'condition': identity})
    ds_c2 = Dataset(c2_stacked, sa={'condition': identity})
    
    perm_c1 = []
    perm_c2 = []
    for iperm in range(35):
        # permute
        permute_conditions(ds_c1, permute=iperm)
        permute_conditions(ds_c2, permute=iperm)

        # make familiarity labels
        ds_c1.sa['familiarity'] = ['familiar' if 'friend' in a
                                    else 'control'
                                    for a in ds_c1.sa.condition]
        ds_c2.sa['familiarity'] = ['familiar' if 'friend' in a
                                    else 'control'
                                    for a in ds_c2.sa.condition]

        clf = LinearCSVMC()

        ds_c1.sa['targets'] = ds_c1.sa['familiarity']
        ds_c2.sa['targets'] = ds_c2.sa['familiarity']

        partitioner = FactorialPartitioner(
            NFoldPartitioner(attr='condition'),
            attr='targets')

        cv = CrossValidation(clf, partitioner)

        perm_c1.append(1. - cv(ds_c1).samples.mean())
        perm_c2.append(1. - cv(ds_c2).samples.mean())
        
    return perm_c1, perm_c2

In [6]:
import pandas as pd

In [7]:
acc_c1c2 = Parallel(n_jobs=8, verbose=50)(delayed(run_permutations_subject)(subj) for subj in subjs)

[Parallel(n_jobs=8)]: Done   1 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done   3 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done   4 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done   5 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done   6 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done   7 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done   8 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  10 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  11 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  12 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  13 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  14 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  15 tasks      | elapsed:  4.4min
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:  4.5min
[Parallel(n_jobs=8)]: Do

In [8]:
# split for c1 and c2
acc_c1 = []
acc_c2 = []
for c1, c2 in acc_c1c2:
    acc_c1.extend(c1)
    acc_c2.extend(c2)

In [9]:
len(acc_c1), len(acc_c2)

(1155, 1155)

In [10]:
len(acc_c1c2), len(acc_c1c2[0]), len(acc_c1c2[0][0])

(33, 2, 35)

The index moving faster is permutation, then subjects

In [11]:
subjects = ['sub{0:03d}'.format(i) for i in range(1, 34)]
permutations = ['perm{0:02d}'.format(i) for i in range(35)]

In [12]:
df = pd.DataFrame(dict(
        subject=np.repeat(subjects, len(permutations)),
        permutation=np.tile(permutations, len(subjects)),
        acc_c1=acc_c1,
        acc_c2 = acc_c2),
            columns=['subject', 'permutation', 'acc_c1', 'acc_c2'])

In [13]:
df

Unnamed: 0,subject,permutation,acc_c1,acc_c2
0,sub001,perm00,0.239583,0.291667
1,sub001,perm01,0.437500,0.697917
2,sub001,perm02,0.437500,0.697917
3,sub001,perm03,0.437500,0.697917
4,sub001,perm04,0.437500,0.697917
5,sub001,perm05,0.437500,0.697917
6,sub001,perm06,0.437500,0.697917
7,sub001,perm07,0.437500,0.697917
8,sub001,perm08,0.437500,0.697917
9,sub001,perm09,0.510417,0.708333


In [16]:
try:
    os.makedirs('../../results/hmax_famvsunf-id')
except OSError:
    pass

In [17]:
df.to_csv('../../results/hmax_famvsunf-id/hmax_famvsunfam-id_c1c2_perm.csv', index=False) 

In [18]:
df.groupby('permutation')['acc_c1'].describe()

permutation       
perm00       count    33.000000
             mean      0.515783
             std       0.162904
             min       0.239583
             25%       0.406250
             50%       0.510417
             75%       0.625000
             max       0.916667
perm01       count    33.000000
             mean      0.492424
             std       0.100834
             min       0.333333
             25%       0.427083
             50%       0.468750
             75%       0.562500
             max       0.750000
perm02       count    33.000000
             mean      0.492424
             std       0.100834
             min       0.333333
             25%       0.427083
             50%       0.468750
             75%       0.562500
             max       0.750000
perm03       count    33.000000
             mean      0.492424
             std       0.100834
             min       0.333333
             25%       0.427083
             50%       0.468750
                     

In [19]:
df.groupby('permutation')['acc_c2'].describe()

permutation       
perm00       count    33.000000
             mean      0.489899
             std       0.160858
             min       0.260417
             25%       0.375000
             50%       0.479167
             75%       0.625000
             max       0.864583
perm01       count    33.000000
             mean      0.509470
             std       0.164874
             min       0.250000
             25%       0.375000
             50%       0.510417
             75%       0.604167
             max       0.895833
perm02       count    33.000000
             mean      0.509470
             std       0.164874
             min       0.250000
             25%       0.375000
             50%       0.510417
             75%       0.604167
             max       0.895833
perm03       count    33.000000
             mean      0.509470
             std       0.164874
             min       0.250000
             25%       0.375000
             50%       0.510417
                     