Please get the raw, predicted, and residual features from `120_features` folder.

This will save demographics and trait measures. The demo and trait measures will be run in separate analyses.

In [1]:
import os
import pandas as pd
import numpy as np
import readline
import rpy2
import rpy2.robjects as robjects
r = robjects.r

import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

from rpy2.robjects import pandas2ri
pandas2ri.activate()

from sklearn.preprocessing import scale

# Run

In [13]:
def load_dat(timing):
    # Read in data
    dfa = pd.read_csv("measures/z_mean_vid_vals.csv")
    dfb = pd.read_csv("measures/z_mean_rel_vid_vals.csv")
    df  = pd.concat([dfa.ix[:,1:-2],dfb.ix[:,1:-1]], axis=1)
    df  = df.ix[:,df.columns != "mean_fds"]
    
    # Get the video names
    # We want to reorder the dataframe above based on the timing info
    feat_vnames = dfa.ix[:,-1]
    inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]
    
    # Extract
    df_cols = df.columns
    df_dat  = df.ix[inds,:]
    
    # Make matrix
    df_dat = df_dat.as_matrix()

    # Center the columns
    df_dat = scale(df_dat, with_mean=True, with_std=False)

    return (df_cols, df_dat)

In [17]:
def face_activity(runs, onsets):
    uruns = np.unique(runs)
    nruns = uruns.shape[0]
    afni_facemat = []

    for ri in range(nruns):
        run_inds = runs == uruns[ri]
        n = np.sum(run_inds)

        ovec = onsets[run_inds].astype('float32').round(4)
        row = [ '%.5f' % ovec[i] for i in range(n) ]
        row = " ".join(row)

        afni_facemat.append(row)

    return np.array(afni_facemat)

def question_activity(runs, onsets, q_regressor):
    uruns = np.unique(runs)
    
    afni_qmat = []
    nruns = uruns.shape[0]
    
    for ri in range(nruns):
        run_inds = runs == uruns[ri]
        n = np.sum(run_inds)
        
        qvec = q_regressor[run_inds]
        ovec = onsets[run_inds].astype('float32').round(4)
        row  = np.array([ '%.5f' % ovec[i] for i,touse in enumerate(qvec) if touse == 1 ])
        if len(row) == 0:
            row = '*'
        else:
            row = " ".join(row)

        afni_qmat.append(row)
    
    return np.array(afni_qmat)

def motion_covars(subj):
    funcdir = "/data1/famface01/analysis/preprocessed/%s/func" % subj
    df_paths = pd.read_table("%s/df_paths.txt" % funcdir, sep=" ")

    inds = df_paths.inindex[df_paths.name == 'unfam_vids']
    motion_fpaths = [ "%s/mc/func_run%02i_dfile.1D" % (funcdir, ind) for ind in inds ]

    from sklearn.preprocessing import scale
    motion_mats = []
    for fpath in motion_fpaths:
        x = np.loadtxt(fpath)
        x = scale(x, with_std=False, with_mean=True)
        motion_mats.append(x)
    motion_mat = np.vstack(motion_mats)
    
    return motion_mat

In [16]:
def am_activity(runs, onsets, df_mat):
    uruns = np.unique(runs)
    
    nruns = uruns.shape[0]
    afni_mats = []
    
    for ci in range(df_mat.shape[1]):
        afni_mat = []
        for ri in range(nruns):
            run_inds = runs == uruns[ri]
            n        = np.sum(run_inds)
            
            ovecs= onsets[run_inds].astype('float32').round(4)
            dvecs= df_mat[run_inds,ci]
            row = [ '%.5f*%f' % (ovecs[i],dvecs[i]) for i in range(n) ]
            row = " ".join(row)
            
            afni_mat.append(row)
        afni_mats.append(np.array(afni_mat))
    
    return afni_mats

In [18]:
# Skip the first subject...for now
for si in range(6):
    subj = "sub%02i" % (si+1)
    print(subj)
    
    # Load the R data
    infile = "/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_%s.rda" % subj
    r.load(infile)
    
    # Variables
    onsets     = np.array(r.dat.rx2('basics').rx2('timing').rx2('onset'))
    questions  = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('question')))
    runs       = np.array(r.dat.rx2('basics').rx2('timing').rx2('run'))
    uruns      = np.unique(runs)
    timing     = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))
    
    # Get data
    dat_cols, dat = load_dat(timing)
        
    
    ###
    # ACTIVITY
    ###
    
    # face
    afni_facemat = face_activity(runs, onsets)
    
    # questions
    q_regressor  = (questions != 'none') * 1
    afni_qmat    = question_activity(runs, onsets, q_regressor)
    
    # motion
    motion_mat   = motion_covars(subj)
    
    # pose/shape/etc
    shape_dat    = am_activity(runs, onsets, dat)
    
    
    ###
    # SAVE
    ###
    
    base = "/data1/famface01/command/misc/face_representations"
    outbase = "%s/300_task_activity/100_face_deviations_unfam/timings" % base
    outdir = "%s/%s" % (outbase, subj)
    print outdir
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    
    # Faces
    ofname = '%s/stim_faces.txt' % outdir
    np.savetxt(ofname, afni_facemat, fmt='%s')

    # Measures
    for i,amat in enumerate(shape_dat):
        cname = dat_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')
    
    # Questions
    ofname = '%s/stim_questions.txt' % outdir
    np.savetxt(ofname, afni_qmat, fmt='%s')

    # MOTION
    ofname = '%s/motion.1D' % outdir
    np.savetxt(ofname, motion_mat, fmt='%f')

sub01
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub01
sub02
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub02
sub03
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub03
sub04
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub04
sub05
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub05
sub06
/data1/famface01/command/misc/face_representations/300_task_activity/100_face_deviations_unfam/timings/sub06


In [19]:
load_dat(timing)

(Index([u'scale', u'pose_scores', u'pose_scores.1', u'pose_scores.2',
        u'pose_scores.3', u'pose_mse', u'mouth_scores', u'mouth_scores.1',
        u'asym', u'mean_face', u'pca_texture', u'rel_scale', u'rel_pose_scores',
        u'rel_pose_scores.1', u'rel_pose_scores.2', u'rel_pose_scores.3',
        u'rel_pose_mse', u'rel_mouth_scores', u'rel_mouth_scores.1',
        u'rel_mean_fds'],
       dtype='object'),
 array([[  0.47655191,   2.10781816, -14.15279452, ...,   0.62843254,
           0.39202561,   0.71275897],
        [ -0.40090932, -12.00620146, -22.81114445, ...,  -2.3868623 ,
           1.97797233,  -0.74265268],
        [  0.12168128,  -3.72586358,  -4.46122053, ...,  -1.2180728 ,
           0.64683892,   0.26654434],
        ..., 
        [ -0.52547395,  -1.33843328,  -8.09600303, ...,   2.74545662,
          -1.45506116,  -0.35229755],
        [  0.23161384,   7.11699951, -14.48553684, ...,  -1.86815063,
           0.18697416,   0.18607995],
        [ -0.67150856,  -9.