Please get the raw, predicted, and residual features from `120_features` folder.

This will save demographics and trait measures. The demo and trait measures will be run in separate analyses.

Here, we get the raw/pred/resids as well as the probabilistic values for gender.

In [2]:
import os
import pandas as pd
import numpy as np
import readline
import rpy2
import rpy2.robjects as robjects
r = robjects.r

import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

from rpy2.robjects import pandas2ri
pandas2ri.activate()

from sklearn.preprocessing import scale

# Run

In [22]:
# We also remove the effect of gender from makeup
def load_probs_dat(timing, to_center=True):
    df = pd.read_csv("../../120_features/demo+traits_raw+pred+resids.csv")
    feat_vnames = df.ix[:,0]
    
    # We want to reorder the dataframe above based on the timing info
    inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]
    #print( np.all(feat_vnames[inds] == timing.video) )
    df = df.ix[inds,1:]
    
    # Remove effect of gender from probs
    resids = r.lm('gender_probs ~ gender_raw', data=df).rx2('residuals')
    df['gender_rprobs'] = resids
    
    # Make gender raw to -5 to 0.5 to get difference inherent in this thing
    df['gender_diff'] = df['gender_raw'] - 0.5
    
    # Select the data that want
    df = df.ix[:,["gender_diff", "gender_rprobs"]]
    
    # Center the probability values
    df["gender_rprobs"] = scale(df.gender_rprobs, with_mean=True, with_std=False)
    
    # Extract
    df_cols = df.columns
    df_dat  = df.as_matrix()
    
    return (df_cols, df_dat)
    
def load_dat(timing, to_center=True):
    df = pd.read_csv("../../120_features/demo+traits_raw+pred+resids.csv")
    feat_vnames = df.ix[:,0]
    
    # We want to reorder the dataframe above based on the timing info
    inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]
    #print( np.all(feat_vnames[inds] == timing.video) )
    
    # Extract
    df_cols = df.columns[1:]
    df_dat  = df.ix[inds,1:]
    
    # Remove effect of gender from makeup
    resids = r.lm('makeup_raw ~ gender_raw', data=df_dat).rx2('residuals')
    df_dat.makeup_raw = resids
    resids = r.lm('makeup_pred ~ gender_pred', data=df_dat).rx2('residuals')
    df_dat.makeup_pred = resids
    resids = r.lm('makeup_resid ~ gender_resid', data=df_dat).rx2('residuals')
    df_dat.makeup_resid = resids
    
    # Make matrix
    df_dat = df_dat.as_matrix()
    
    # Center the none binary values
    cinds = np.array([ x in ['glasses_resid', 'glasses_probs', 'glasses_residprobs', 'gender_resid', 'gender_probs', 'gender_residprobs'] for x in df_cols ])
    cinds = range(0,21) + cinds.nonzero()[0].tolist()
    df_dat[:,cinds] = scale(df_dat[:,cinds], with_mean=True, with_std=False)
    
    return (df_cols, df_dat)

In [24]:
def face_activity(runs, onsets):
    uruns = np.unique(runs)
    nruns = uruns.shape[0]
    afni_facemat = []

    for ri in range(nruns):
        run_inds = runs == uruns[ri]
        n = np.sum(run_inds)

        ovec = onsets[run_inds].astype('float32').round(4)
        row = [ '%.5f' % ovec[i] for i in range(n) ]
        row = " ".join(row)

        afni_facemat.append(row)

    return np.array(afni_facemat)

def question_activity(runs, onsets, q_regressor):
    uruns = np.unique(runs)
    
    afni_qmat = []
    nruns = uruns.shape[0]
    
    for ri in range(nruns):
        run_inds = runs == uruns[ri]
        n = np.sum(run_inds)
        
        qvec = q_regressor[run_inds]
        ovec = onsets[run_inds].astype('float32').round(4)
        row  = np.array([ '%.5f' % ovec[i] for i,touse in enumerate(qvec) if touse == 1 ])
        if len(row) == 0:
            row = '*'
        else:
            row = " ".join(row)

        afni_qmat.append(row)
    
    return np.array(afni_qmat)

def motion_covars(subj):
    funcdir = "/data1/famface01/analysis/preprocessed/%s/func" % subj
    df_paths = pd.read_table("%s/df_paths.txt" % funcdir, sep=" ")

    inds = df_paths.inindex[df_paths.name == 'unfam_vids']
    motion_fpaths = [ "%s/mc/func_run%02i_dfile.1D" % (funcdir, ind) for ind in inds ]

    from sklearn.preprocessing import scale
    motion_mats = []
    for fpath in motion_fpaths:
        x = np.loadtxt(fpath)
        x = scale(x, with_std=False, with_mean=True)
        motion_mats.append(x)
    motion_mat = np.vstack(motion_mats)
    
    return motion_mat

In [25]:
def am_activity(runs, onsets, df_mat):
    uruns = np.unique(runs)
    
    nruns = uruns.shape[0]
    afni_mats = []
    
    for ci in range(df_mat.shape[1]):
        afni_mat = []
        for ri in range(nruns):
            run_inds = runs == uruns[ri]
            n        = np.sum(run_inds)
            
            ovecs= onsets[run_inds].astype('float32').round(4)
            dvecs= df_mat[run_inds,ci]
            row = [ '%.5f*%f' % (ovecs[i],dvecs[i]) for i in range(n) ]
            row = " ".join(row)
            
            afni_mat.append(row)
        afni_mats.append(np.array(afni_mat))
    
    return afni_mats

In [103]:
# Skip the first subject...for now
for si in range(6):
    subj = "sub%02i" % (si+1)
    print(subj)
    
    # Load the R data
    infile = "/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_%s.rda" % subj
    r.load(infile)
    
    # Variables
    onsets     = np.array(r.dat.rx2('basics').rx2('timing').rx2('onset'))
    questions  = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('question')))
    runs       = np.array(r.dat.rx2('basics').rx2('timing').rx2('run'))
    uruns      = np.unique(runs)
    timing     = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))
    
    # Get data
    dat_cols, dat = load_dat(timing)
        
    
    ###
    # ACTIVITY
    ###
    
    # face
    afni_facemat = face_activity(runs, onsets)
    
    # questions
    q_regressor  = (questions != 'none') * 1
    afni_qmat    = question_activity(runs, onsets, q_regressor)
    
    # motion
    motion_mat   = motion_covars(subj)
    
    # raw values
    raw_inds     = np.array(r.grep('raw', dat_cols))-1
    raw_cols     = dat_cols[raw_inds]
    raw_dat      = dat[:,raw_inds]
    raw_afni     = am_activity(runs, onsets, raw_dat)
    
    # predicted
    pred_inds    = np.array(r.grep('pred$', dat_cols))-1
    pred_cols    = dat_cols[pred_inds]
    pred_dat     = dat[:,pred_inds]
    pred_afni    = am_activity(runs, onsets, pred_dat)
    
    # residuals
    resid_inds   = np.array(r.grep('resid$', dat_cols))-1
    resid_cols   = dat_cols[resid_inds]
    resid_dat    = dat[:,resid_inds]
    resid_afni   = am_activity(runs, onsets, resid_dat)
    
    # probs
    prob_inds    = np.array(r.grep('probs', dat_cols))-1
    prob_cols    = dat_cols[prob_inds]
    prob_dat     = dat[:,prob_inds]
    prob_afni    = am_activity(runs, onsets, prob_dat)
    
    
    ###
    # SAVE
    ###

    base = "/data1/famface01/command/misc/face_representations"
    outbase = "%s/300_task_activity/150_face_basics_unfam/timings" % base
    outdir = "%s/%s" % (outbase, subj)
    print outdir
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    # Faces
    ofname = '%s/stim_faces.txt' % outdir
    np.savetxt(ofname, afni_facemat, fmt='%s')

    # Raw
    for i,amat in enumerate(raw_afni):
        cname = raw_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')
    
    # Pred
    for i,amat in enumerate(pred_afni):
        cname = pred_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')
    
    # Resid
    for i,amat in enumerate(resid_afni):
        cname = resid_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')
    
    # Prob
    for i,amat in enumerate(prob_afni):
        cname = prob_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')
    
    # Questions
    ofname = '%s/stim_questions.txt' % outdir
    np.savetxt(ofname, afni_qmat, fmt='%s')

    # MOTION
    ofname = '%s/motion.1D' % outdir
    np.savetxt(ofname, motion_mat, fmt='%f')

sub01
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub01
sub02
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub02
sub03
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub03
sub04
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub04
sub05
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub05
sub06
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub06


In [107]:
dat_cols, dat = load_dat(timing)
df = pd.DataFrame(dat, columns=dat_cols)
print(r.round(r.cor(df), 2))

                   age_raw age_pred age_resid makeup_raw makeup_pred
age_raw               1.00     0.71      0.70       0.13       -0.01
age_pred              0.71     1.00      0.00       0.00       -0.02
age_resid             0.70     0.00      1.00       0.18        0.01
makeup_raw            0.13     0.00      0.18       1.00        0.40
makeup_pred          -0.01    -0.02      0.01       0.40        1.00
makeup_resid          0.13    -0.02      0.20       0.68       -0.27
trait01_raw           0.12     0.20     -0.03      -0.16       -0.10
trait01_pred          0.20     0.24      0.04      -0.09       -0.14
trait01_resid        -0.01     0.06     -0.07      -0.14       -0.01
trait02_raw           0.37     0.25      0.27      -0.07       -0.14
trait02_pred          0.34     0.46      0.03      -0.14       -0.15
trait02_resid         0.27     0.10      0.28      -0.03       -0.10
trait03_raw          -0.31    -0.24     -0.20      -0.32       -0.20
trait03_pred         -0.29    -0.3

### Only Gender Prob

In [26]:
# Skip the first subject...for now
for si in range(6):
    subj = "sub%02i" % (si+1)
    print(subj)
    
    # Load the R data
    infile = "/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_%s.rda" % subj
    r.load(infile)
    
    # Variables
    onsets     = np.array(r.dat.rx2('basics').rx2('timing').rx2('onset'))
    questions  = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('question')))
    runs       = np.array(r.dat.rx2('basics').rx2('timing').rx2('run'))
    uruns      = np.unique(runs)
    timing     = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))
    
    # Get data
    dat_cols, dat = load_probs_dat(timing)
        
    
    ###
    # ACTIVITY
    ###
    
    # face
    afni_facemat = face_activity(runs, onsets)
    
    # questions
    q_regressor  = (questions != 'none') * 1
    afni_qmat    = question_activity(runs, onsets, q_regressor)
    
    # motion
    motion_mat   = motion_covars(subj)
    
    # gender raw and prob
    gender_afni     = am_activity(runs, onsets, dat)
    
    
    ###
    # SAVE
    ###

    base = "/data1/famface01/command/misc/face_representations"
    outbase = "%s/300_task_activity/150_face_basics_unfam/timings" % base
    outdir = "%s/%s" % (outbase, subj)
    print outdir
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    ## Faces
    #ofname = '%s/stim_faces.txt' % outdir
    #np.savetxt(ofname, afni_facemat, fmt='%s')

    # Gender
    for i,amat in enumerate(gender_afni):
        cname = dat_cols[i]
        ofname = '%s/stimam_%s.txt' % (outdir, cname)
        np.savetxt(ofname, amat, fmt='%s')

sub01
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub01
sub02
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub02
sub03
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub03
sub04
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub04
sub05
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub05
sub06
/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub06


## Gender Prob

In [11]:
df = pd.read_csv("../../120_features/demo+traits_raw+pred+resids.csv")
df.head()

retx = r.lm("gender_probs ~ gender_raw", data=df)
print np.array(retx.rx2('residuals'))[:10]

[-0.11505164 -0.10899156 -0.00377593  0.11003352 -0.12748018 -0.10127152
  0.10832148 -0.0956561   0.09920694  0.03157283]


0    1.009787
1    1.010371
2   -1.028740
3   -1.009689
4    1.008692
5    1.011165
6    1.082699
7    1.011780
8   -1.010744
9    1.039724
Name: gender_residprobs, dtype: float64

In [14]:
si = 1

subj = "sub%02i" % (si+1)
print(subj)

# Load the R data
infile = "/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_%s.rda" % subj
r.load(infile)

# Variables
onsets     = np.array(r.dat.rx2('basics').rx2('timing').rx2('onset'))
questions  = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('question')))
runs       = np.array(r.dat.rx2('basics').rx2('timing').rx2('run'))
uruns      = np.unique(runs)
timing     = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))

df_cols, df_dat = load_probs_dat(timing)

sub02


In [16]:
df_dat.shape

(1728, 2)

# Example Subject

In [2]:
subj = "sub03"

# Load the R data
infile = "/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_%s.rda" % subj
print(infile)
r.load(infile)

print(r.dat.rx2('basics').rx2('timing').names)

/data1/famface01/analysis/encoding/ShapeAnalysis/data/roi_n_more_sub03.rda
[1] "run"         "onset"       "local.onset" "duration"    "question"   
[6] "video"      



In [3]:
onsets     = np.array(r.dat.rx2('basics').rx2('timing').rx2('onset'))
questions  = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('question')))
vids       = np.array(r['as.character'](r.dat.rx2('basics').rx2('timing').rx2('video')))
vidlevs    = np.array(r.dat.rx2('basics').rx2('timing').rx2('video').levels)
fams       = np.array(r.dat.rx2('basics').rx2('timing').rx2('fam'))
runs       = np.array(r.dat.rx2('basics').rx2('timing').rx2('run'))
uruns      = np.unique(runs)

print(r.table(questions))
print(r.table(runs))


        age  attractive      gender intelligent        none trustworthy 
         16          16          32          16        1632          16 


  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 
108 108 108 108 108 108 108 108 108 108 108 108 108 108 108 108 



In [23]:
base = "/data1/famface01/analysis/encoding/12_Features"
demos = pd.read_csv('%s/demographics_unfam_df.csv' % base)
demos.head()

Unnamed: 0.1,Unnamed: 0,batch,video,age,facial_hair,makeup,gender,race,hair,eye,glasses
0,Aaron_Sorkin,0,Aaron_Sorkin_vid03_clip01_2secs_fr045,3.166667,,0.75,Male,White,Brown,Brown,Yes
1,Abdullah,0,Abdullah_vid02_clip01_2secs_fr039,1.333333,Goatee and moustache,0.083333,Male,Black,Black,Brown,No
2,Agnes_Bruckner,0,Agnes_Bruckner_vid02_clip05_2secs_fr003,1.916667,,1.416667,Female,White,Blonde,Brown,No
3,Abba_Eban,0,Abba_Eban_vid03_clip18_2secs_fr033,5.583333,,0.416667,Male,White,White,Brown,No
4,Aisha_Hinds,0,Aisha_Hinds_vid05_clip02_2secs_fr027,2.25,,1.75,Female,Black,Black,Brown,No


In [27]:
import re
base = "/data1/famface01/analysis/encoding/12_Features"
demos = pd.read_csv('%s/demographics_unfam_df.csv' % base)
vnames0 = demos.ix[:,0]
demos = demos.ix[:,1:]
vnames = demos.video # typo
vnames = [ re.sub("_fr[0-9]{3}", "", vname) for vname in vnames ]
vnames = np.array(vnames)
vnames[:10]

array(['Aaron_Sorkin_vid03_clip01_2secs', 'Abdullah_vid02_clip01_2secs',
       'Agnes_Bruckner_vid02_clip05_2secs', 'Abba_Eban_vid03_clip18_2secs',
       'Aisha_Hinds_vid05_clip02_2secs',
       'Alejandro_Toledo_vid03_clip01_2secs',
       'Ali_Abbas_vid03_clip01_2secs', 'Ali_Landry_vid06_clip05_2secs',
       'Sylvia_Jeffreys', 'wall_of_faces_vid93'], 
      dtype='|S47')

In [29]:
(vnames == 'Lynne_McGranger').nonzero()

(array([249]),)

In [8]:
print r.dat.rx2('features').rx2('demos').names

[1] "age"         "facial_hair" "makeup"      "gender"      "race"       
[6] "hair"        "eye"         "glasses"    



In [96]:
pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing')).head()

Unnamed: 0,run,onset,local.onset,duration,question,video
1,1,0.001657,0.001657,2.0,none,Lynne_McGranger
2,1,2.025683,2.025683,2.0,none,Larry_Hagman_U215_vid05_clip01_2secs
3,1,4.035705,4.035705,2.0,none,manyfaces01_vid03_clip01_face08_2secs
4,1,10.200527,10.200527,2.0,none,Jacques_Rogge_U089_vid02_clip06_2secs
5,1,12.210857,12.210857,2.0,none,Steevy_Boulay


In [95]:
pandas2ri.ri2py(r.dat.rx2('features').rx2('demos')).head() # so these will be re-ordered by the video

Unnamed: 0,age,facial_hair,makeup,gender,race,hair,eye,glasses
788,3.666667,,1.333333,Female,White,Black,Hazel,No
250,1.0,,0.2,Male,White,Blonde,Brown,Yes
360,0.2,,0.0,Male,White,Black,Brown,No
530,2.0,Goatee and moustache,0.2,Male,Black,Bald,Brown,No
645,2.0,Stubble,0.2,Male,Hispanic/Latino,Black,Brown,No


In [74]:
ro_demos = pandas2ri.ri2py(r.dat.rx2('features').rx2('demos'))

In [75]:
# Select the measures that we want
ro_subdemos = ro_demos.loc[:,["age","gender","makeup","glasses"]]
ro_subdemos.head()

Unnamed: 0,age,gender,makeup,glasses
788,3.666667,Female,1.333333,No
250,1.0,Male,0.2,Yes
360,0.2,Male,0.0,No
530,2.0,Male,0.2,No
645,2.0,Male,0.2,No


In [76]:
# Make the factors into numbers
ro_subdemos.loc[:,'gender'] = (ro_subdemos.gender == 'Female')*1 - 0.5 # 0.5 = female
ro_subdemos.loc[:,'glasses'] = (ro_subdemos.glasses == 'Yes')*1 - 0.5 # 0.5 = glasses

# Remove the mean from the age
ro_subdemos.loc[:,'age'] = scale(ro_subdemos.age, with_std=False)
ro_subdemos.loc[:,'makeup'] = scale(ro_subdemos.makeup, with_std=False)

ro_subdemos.head()

Unnamed: 0,age,gender,makeup,glasses
788,1.248129,0.5,0.686349,-0.5
250,-1.418538,-0.5,-0.446984,0.5
360,-2.218538,-0.5,-0.646984,-0.5
530,-0.418538,-0.5,-0.446984,-0.5
645,-0.418538,-0.5,-0.446984,-0.5


In [77]:
ro_subdemos_cols = ro_subdemos.columns
ro_subdemos = ro_subdemos.as_matrix()

In [78]:
np.corrcoef(ro_subdemos.T) # make-up and gender are highly correlated

array([[ 1.        , -0.20614244, -0.09256538,  0.14383106],
       [-0.20614244,  1.        ,  0.81037275, -0.19042234],
       [-0.09256538,  0.81037275,  1.        , -0.20958931],
       [ 0.14383106, -0.19042234, -0.20958931,  1.        ]])

In [56]:
###
# FACE ACTIVITY
###

nruns = uruns.shape[0]
afni_facemat = []

for ri in range(nruns):
    run_inds = runs == uruns[ri]
    n = np.sum(run_inds)
    
    ovec = onsets[run_inds].astype('float32').round(4)
    row = [ '%.5f' % ovec[i] for i in range(n) ]
    row = " ".join(row)

    afni_facemat.append(row)

afni_facemat = np.array(afni_facemat)
afni_facemat[:2]

array([ '0.00170 2.02570 4.03570 10.20050 12.21090 14.22120 16.23130 18.24150 20.25180 24.33920 26.38280 28.39310 30.40330 36.00060 40.03560 44.13900 46.20010 48.21050 50.22070 52.23090 54.24110 58.29510 60.33880 62.38210 64.39230 66.40250 68.41270 70.42290 78.02870 80.08910 82.13280 84.14290 86.15330 90.17360 92.18390 96.23790 98.29820 100.32510 102.33550 108.36620 110.37640 114.48060 116.52400 118.53440 120.54470 122.55490 124.56500 128.61900 134.73340 136.74361 138.93810 140.94839 142.99210 152.17210 154.21581 156.22591 158.23610 160.24640 166.27699 168.32080 172.39120 174.58600 176.59621 178.60629 180.61659 182.66029 184.70360 189.02600 191.03619 195.27440 197.31810 199.36180 201.37199 203.38220 210.16660 212.17680 214.18700 220.00060 222.02699 228.15810 230.16830 232.17860 234.18880 238.20920 240.23610 242.27991 248.36079 254.39140 256.40161 258.44501 260.50580 262.51590 268.71420 270.72440 272.76770 274.81180 276.87219 278.88251 282.90289 284.91321 286.92340 296.03629 298.09680 3

In [79]:
###
# Demographics
###

nruns = uruns.shape[0]
afni_demomats = []

for ci in range(ro_subdemos.shape[1]):
    afni_demomat = []
    for ri in range(nruns):
        run_inds = runs == uruns[ri]
        n        = np.sum(run_inds)

        ovecs= onsets[run_inds].astype('float32').round(4)
        dvecs= ro_subdemos[run_inds,ci]
        row = [ '%.5f*%f' % (ovecs[i],dvecs[i]) for i in range(n) ]
        row = " ".join(row)

        afni_demomat.append(row)
    afni_demomats.append(np.array(afni_demomat))

afni_demomats[0][:2]

array([ '0.00170*1.248129 2.02570*-1.418538 4.03570*-2.218538 10.20050*-0.418538 12.21090*-0.418538 14.22120*-1.751871 16.23130*1.181462 18.24150*-0.018538 20.25180*1.381462 24.33920*2.581462 26.38280*2.081462 28.39310*-2.307427 30.40330*-1.418538 36.00060*-0.618538 40.03560*-1.618538 44.13900*-0.818538 46.20010*0.181462 48.21050*-0.618538 50.22070*0.581462 52.23090*-1.218538 54.24110*1.781462 58.29510*-0.085204 60.33880*-0.418538 62.38210*1.248129 64.39230*2.381462 66.40250*-1.018538 68.41270*1.781462 70.42290*0.381462 78.02870*0.981462 80.08910*-0.618538 82.13280*1.581462 84.14290*0.781462 86.15330*-0.418538 90.17360*1.581462 92.18390*-1.218538 96.23790*-1.018538 98.29820*-0.018538 100.32510*-1.418538 102.33550*-0.018538 108.36620*-0.618538 110.37640*-1.018538 114.48060*-0.818538 116.52400*2.981462 118.53440*-0.585204 120.54470*-0.018538 122.55490*2.181462 124.56500*-0.618538 128.61900*1.781462 134.73340*0.581462 136.74361*-0.018538 138.93810*-0.418538 140.94839*-0.618538 142.99210*-

In [37]:
###
# QUESTIONS
###

q_regressor = (questions != 'none') * 1
print(r.table(q_regressor))

## note: this will need to be saved separately

afni_qmat = []
nruns = uruns.shape[0]
for ri in range(nruns):
    run_inds = runs == uruns[ri]
    n = np.sum(run_inds)

    qvec = q_regressor[run_inds]
    ovec = onsets[run_inds].astype('float32').round(4)
    row  = np.array([ '%.5f' % ovec[i] for i,touse in enumerate(qvec) if touse == 1 ])
    if len(row) == 0:
        row = '*'
    else:
        row = " ".join(row)
    
    afni_qmat.append(row)

afni_qmat[:2]


   0    1 
1632   96 



['30.40 70.42 142.99 203.38 214.19 286.92',
 '356.63 396.36 432.36 517.36 532.17 590.80']

In [41]:
###
# MOTION
###

funcdir = "/data1/famface01/analysis/preprocessed/%s/func" % subj
df_paths = pd.read_table("%s/df_paths.txt" % funcdir, sep=" ")

inds = df_paths.inindex[df_paths.name == 'fam_vids']
motion_fpaths = [ "%s/mc/func_run%02i_dfile.1D" % (funcdir, ind) for ind in inds ]

from sklearn.preprocessing import scale
motion_mats = []
for fpath in motion_fpaths:
    x = np.loadtxt(fpath)
    x = scale(x, with_std=False, with_mean=True)
    motion_mats.append(x)
motion_mat = np.vstack(motion_mats)
motion_mat[:4,:].round(4)

array([[-0.1902, -0.1482, -0.1249,  0.0603, -0.1349, -0.0231],
       [-0.1712, -0.0821, -0.1478,  0.0443, -0.125 ,  0.064 ],
       [-0.1563, -0.0612, -0.1005, -0.0062, -0.1183,  0.0553],
       [-0.1791, -0.1469, -0.138 ,  0.0557, -0.1137, -0.0185]])

In [80]:
###
# SAVE
###

base = "/data1/famface01/command/misc/face_representations"
outbase = "%s/300_task_activity/150_face_basics_unfam/timings" % base
outdir = "%s/%s" % (outbase, subj)
print outdir
if not os.path.exists(outdir):
    os.mkdir(outdir)

# Faces
ofname = '%s/stim_faces.txt' % outdir
np.savetxt(ofname, afni_facemat, fmt='%s')

# Demos
for i,amat in enumerate(afni_demomats):
    cname = ro_subdemos_cols[i]
    ofname = '%s/stimam_%s.txt' % (outdir, cname)
    np.savetxt(ofname, amat, fmt='%s')

# Questions
ofname = '%s/stim_questions.txt' % outdir
np.savetxt(ofname, afni_qmat, fmt='%s')

# MOTION
ofname = '%s/motion.1D' % outdir
np.savetxt(ofname, motion_mat, fmt='%f')

/data1/famface01/command/misc/face_representations/300_task_activity/150_face_basics_unfam/timings/sub03


## Traits

In [10]:
print r.dat.rx2('features').names

[1] "face"       "shape"      "appearance" "demos"      "openface"  
[6] "traits"     "featconfig"



In [11]:
pandas2ri.ri2py(r.dat.rx2('features').rx2('traits')).head() # so these were re-ordered by the video

Unnamed: 0,attractive,extraverted,trustworthy,intelligent,mean,competent,typical,unemotional,memorable,humble
476,4.5,4.916667,4.416667,4.909091,4.6,6.083333,4.818182,5.5,6.0,3.8
426,5.833333,4.833333,5.916667,6.333333,3.272727,6.076923,5.833333,4.909091,5.0,5.083333
492,5.333333,7.0,5.75,5.363636,3.4,5.416667,6.363636,4.833333,4.363636,4.1
343,6.25,3.75,5.333333,5.5,4.636364,5.307692,5.083333,5.727273,5.076923,5.583333
648,5.75,7.083333,5.384615,4.916667,2.833333,4.833333,5.583333,3.090909,5.0,5.363636


In [12]:
base         = "/data1/famface01/analysis/encoding/12_Features"
traits_df    = pd.read_csv('%s/personality_traits_grpave.csv' % base)
trait_vnames = traits_df.ix[:,0]
trait_vnames[:10]

0        Aaron_Sorkin_vid03_clip01_2secs
1           Abba_Eban_vid03_clip18_2secs
2            Abdullah_vid02_clip01_2secs
3                          Ada_Nicodemou
4            Adam_Ant_vid02_clip05_2secs
5      Adel_Al_Jubeir_vid01_clip01_2secs
6                           Adele_Haenel
7    Adrian_Fernandez_vid01_clip01_2secs
8    Adrienne_Barbeau_vid06_clip01_2secs
9     Adrienne_Frantz_vid05_clip03_2secs
Name: Unnamed: 0, dtype: object

In [13]:
(trait_vnames == 'Lynne_McGranger').nonzero()

(array([475]),)

In [14]:
traits_df = pandas2ri.ri2py(r.dat.rx2('features').rx2('traits'))

In [15]:
traits_df.head()

Unnamed: 0,attractive,extraverted,trustworthy,intelligent,mean,competent,typical,unemotional,memorable,humble
476,4.5,4.916667,4.416667,4.909091,4.6,6.083333,4.818182,5.5,6.0,3.8
426,5.833333,4.833333,5.916667,6.333333,3.272727,6.076923,5.833333,4.909091,5.0,5.083333
492,5.333333,7.0,5.75,5.363636,3.4,5.416667,6.363636,4.833333,4.363636,4.1
343,6.25,3.75,5.333333,5.5,4.636364,5.307692,5.083333,5.727273,5.076923,5.583333
648,5.75,7.083333,5.384615,4.916667,2.833333,4.833333,5.583333,3.090909,5.0,5.363636


In [16]:
from sklearn.decomposition import PCA, FastICA

pca = PCA()
pca.fit(traits_df.as_matrix())


ica = FastICA(max_iter=1000)
ica.fit(traits_df.as_matrix())



FastICA(algorithm='parallel', fun='logcosh', fun_args=None, max_iter=1000,
    n_components=None, random_state=None, tol=0.0001, w_init=None,
    whiten=True)

In [17]:
print pca.explained_variance_ratio_.round(3)
print np.cumsum(pca.explained_variance_ratio_).round(3)

[ 0.346  0.174  0.13   0.113  0.08   0.052  0.034  0.03   0.025  0.015]
[ 0.346  0.52   0.65   0.763  0.844  0.896  0.93   0.96   0.985  1.   ]


In [18]:
# rows give each component
pca.components_[0,:].round(4)

array([ 0.4222,  0.5318,  0.1179, -0.1417, -0.3655, -0.1269,  0.0167,
       -0.5738,  0.1416, -0.0761])

In [147]:
pca.fit_transform(traits_df.as_matrix())

(1728, 10)

In [116]:
np.corrcoef(traits_df.T).round(2)

array([[ 1.  ,  0.4 ,  0.29, -0.15, -0.3 , -0.13,  0.22, -0.27,  0.14,
        -0.12],
       [ 0.4 ,  1.  ,  0.11, -0.15, -0.34, -0.14,  0.05, -0.7 ,  0.2 ,
        -0.31],
       [ 0.29,  0.11,  1.  ,  0.27, -0.47,  0.24,  0.2 , -0.15, -0.08,
         0.44],
       [-0.15, -0.15,  0.27,  1.  ,  0.09,  0.75,  0.18,  0.23, -0.27,
         0.16],
       [-0.3 , -0.34, -0.47,  0.09,  1.  ,  0.11, -0.06,  0.43, -0.08,
        -0.28],
       [-0.13, -0.14,  0.24,  0.75,  0.11,  1.  ,  0.21,  0.23, -0.2 ,
         0.12],
       [ 0.22,  0.05,  0.2 ,  0.18, -0.06,  0.21,  1.  ,  0.04, -0.47,
        -0.05],
       [-0.27, -0.7 , -0.15,  0.23,  0.43,  0.23,  0.04,  1.  , -0.18,
         0.08],
       [ 0.14,  0.2 , -0.08, -0.27, -0.08, -0.2 , -0.47, -0.18,  1.  ,
        -0.16],
       [-0.12, -0.31,  0.44,  0.16, -0.28,  0.12, -0.05,  0.08, -0.16,  1.  ]])

In [148]:
from sklearn.decomposition import PCA, FastICA
traits_df = pandas2ri.ri2py(r.dat.rx2('features').rx2('traits'))
pca = PCA()
pca.fit(traits_df.as_matrix())
traits_pca = pca.fit_transform(traits_df.as_matrix())

In [150]:
traits_pca.shape

(1728, 10)

## Shape onto Age and Gender

In [7]:
df = pd.read_csv("../../120_features/demo+traits_raw+pred+resids.csv")
feat_vnames = df.ix[:,0]

# We want to reorder the dataframe above based on the timing info
timing = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))
inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]

print( np.all(feat_vnames[inds] == timing.video) )

df.ix[inds,:].head()

True


Unnamed: 0.1,Unnamed: 0,age.raw,age.pred,age.resid,makeup.raw,makeup.pred,makeup.resid,trait01.raw,trait01.pred,trait01.resid,...,glasses.raw,glasses.pred,glasses.resid,glasses.probs,glasses.resid.probs,gender.raw,gender.pred,gender.resid,gender.probs,gender.resid.probs
482,Lynne_McGranger,3.666667,2.944083,0.691593,1.333333,1.112445,0.190051,0.479636,0.261258,0.206155,...,0,0,-1.028497,0.939604,-1.029707,0,0,-1.05665,0.983952,-1.009668
433,Larry_Hagman_U215_vid05_clip01_2secs,1.0,2.377406,-1.386633,0.2,0.203751,0.028911,0.239675,0.568067,-0.348985,...,1,1,1.111111,0.00156,1.024216,1,1,1.06357,0.039662,1.012489
499,manyfaces01_vid03_clip01_face08_2secs,0.2,1.183398,-0.946768,0.0,0.013805,0.032132,-0.929445,-0.159406,-0.770787,...,0,0,-1.028497,0.99636,-1.019173,1,1,1.06357,0.038282,1.012326
350,Jacques_Rogge_U089_vid02_clip06_2secs,2.0,1.861826,0.148749,0.2,0.427965,-0.210971,1.244768,0.270533,0.961759,...,0,0,-1.028497,0.999998,-1.018642,1,1,1.06357,0.180085,1.047773
655,Steevy_Boulay,2.0,1.367899,0.661645,0.2,0.412224,-0.194129,-1.50119,-0.741633,-0.744423,...,0,0,-1.028497,0.999942,-1.01865,1,1,1.06357,0.143207,1.033587


In [99]:
df2 = pd.read_csv("z_demos_givenshape.csv")
feat_vnames = df2.ix[:,0]
df2.head()

Unnamed: 0.1,Unnamed: 0,shape.gender,gender,shape.age,age
0,Ahmed_Chalabi_U000_vid06_clip05_2secs,1.0,Male,3.275345,4.6
1,Aidan_Quinn_vid01_clip03_2secs,1.0,Male,3.343797,3.111111
2,Aisha_Hinds_vid05_clip02_2secs,5.029532e-05,Female,2.129432,2.25
3,Aishwarya_Rai_vid05_clip06_2secs,1.31381e-08,Female,1.717749,2.2
4,AJ_Cook_vid03_clip01_2secs,0.999998,Male,3.436241,2.777778


In [24]:
# We want to reorder the dataframe above based on the timing info
timing = pandas2ri.ri2py(r.dat.rx2('basics').rx2('timing'))
inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]

In [101]:
np.all(feat_vnames[inds] == timing.video)

True

In [102]:
df2 = df2.loc[inds,:]

In [103]:
df2.head()

Unnamed: 0.1,Unnamed: 0,shape.gender,gender,shape.age,age
482,Lynne_McGranger,1.819804e-09,Female,2.519954,3.666667
433,Larry_Hagman_U215_vid05_clip01_2secs,0.7735571,Male,1.920023,1.0
499,manyfaces01_vid03_clip01_face08_2secs,0.9876172,Male,1.366866,0.2
350,Jacques_Rogge_U089_vid02_clip06_2secs,0.9999587,Male,1.885887,2.0
655,Steevy_Boulay,0.9999994,Male,1.989886,2.0


In [104]:
df2.loc[:,'gender'] = (df2.gender == 'Female')*1 - 0.5

In [105]:
np.array(df2['gender'])[:,None].shape

(1728, 1)

In [106]:
fit = r.lm('gender ~ shape.gender', data=df2)
df2.loc[:,'resid.gender'] = np.array(fit.rx2('residuals'))

fit = r.lm('age ~ shape.age', data=df2)
df2.loc[:,'resid.age'] = np.array(fit.rx2('residuals'))

In [110]:
df3 = df2.ix[:,['shape.gender', 'resid.gender', 'shape.age', 'resid.age']]

In [108]:
np.corrcoef(df2.ix[:,1:].T).round(4)

array([[ 1.    , -0.6504,  0.3959,  0.2463,  0.    ,  0.0251],
       [-0.6504,  1.    , -0.2997, -0.2061,  0.7596, -0.043 ],
       [ 0.3959, -0.2997,  1.    ,  0.57  , -0.0556,  0.    ],
       [ 0.2463, -0.2061,  0.57  ,  1.    , -0.0605,  0.8216],
       [ 0.    ,  0.7596, -0.0556, -0.0605,  1.    , -0.0351],
       [ 0.0251, -0.043 ,  0.    ,  0.8216, -0.0351,  1.    ]])

In [111]:
np.corrcoef(df3.T).round(4)

array([[ 1.    ,  0.    ,  0.3959,  0.0251],
       [ 0.    ,  1.    , -0.0556, -0.0351],
       [ 0.3959, -0.0556,  1.    ,  0.    ],
       [ 0.0251, -0.0351,  0.    ,  1.    ]])

In [112]:
import pandas.rpy.common as com
#df3.columns = ['shape.gender', 'resid.gender', 'shape.age', 'resid.age']
df3.index = range(df3.shape[0])
r_dataframe = com.convert_to_r_dataframe(df3)
print r.round(r.cor(r_dataframe), 4)

             shape.gender resid.gender shape.age resid.age
shape.gender       1.0000       0.0000    0.3959    0.0251
resid.gender       0.0000       1.0000   -0.0556   -0.0351
shape.age          0.3959      -0.0556    1.0000    0.0000
resid.age          0.0251      -0.0351    0.0000    1.0000



In [94]:
import pandas.rpy.common as com
df2.index = range(df2.shape[0])
r_dataframe = com.convert_to_r_dataframe(df2.ix[:,1:])
print r.round(r.cor(r_dataframe), 4)

                   shape.gender  gender shape.age     age resid.gender
shape.gender             1.0000 -0.6504    0.3959  0.2463       0.0000
gender                  -0.6504  1.0000   -0.2997 -0.2061       0.7596
shape.age                0.3959 -0.2997    1.0000  0.5700      -0.0556
age                      0.2463 -0.2061    0.5700  1.0000      -0.0605
resid.gender             0.0000  0.7596   -0.0556 -0.0605       1.0000
resid.age                0.0251 -0.0430    0.0000  0.8216      -0.0351
shape                    0.7610 -0.5248    0.8970  0.5213      -0.0393
resid.shape.gender       0.6488 -0.3870   -0.4420 -0.2318       0.0461
resid.shape.age         -0.6488  0.3870    0.4420  0.2318      -0.0461
                   resid.age   shape resid.shape.gender resid.shape.age
shape.gender          0.0251  0.7610             0.6488         -0.6488
gender               -0.0430 -0.5248            -0.3870          0.3870
shape.age             0.0000  0.8970            -0.4420          0.4420
ag

In [None]:
def load_demos_givenshapes(df2):
    
    # Make gender into numbers
    df2.loc[:,'gender'] = (df2.gender == 'Female')*1 - 0.5
    
    # Regress out shape info from gender
    fit = r.lm('gender ~ shape.gender', data=df2)
    df2.loc[:,'resid.gender'] = np.array(fit.rx2('residuals'))
    
    # Do same for age
    fit = r.lm('age ~ shape.age', data=df2)
    df2.loc[:,'resid.age'] = np.array(fit.rx2('residuals'))
    
    # Now get the average shape info from gender and age
    df2.loc[:,'shape'] = (df2['shape.gender'] + df2['shape.age'])/2.
    
    # Then regress that average out from each of the individuals
    fit = r.lm('shape.gender ~ shape', data=df2)
    df2.loc[:,'resid.shape.gender'] = np.array(fit.rx2('residuals'))
    fit = r.lm('shape.age ~ shape', data=df2)
    df2.loc[:,'resid.shape.age'] = np.array(fit.rx2('residuals'))
    
    # Select the columns that you want
    df3 = df2.ix[:,['shape', 'resid.shape.gender', 'resid.gender', 'resid.shape.age', 'resid.age']]
    omat = df3.as_matrix()
    
    # Save the column names
    cnames = ['shape', 'shape_gender', 'gender', 'shape_age', 'age']
    
    return (cnames, omat)

In [25]:
df2 = pd.read_csv("z_traitsfa_givenshape.csv")
feat_vnames = df2.ix[:,0]
inds   = [ (x == feat_vnames).nonzero()[0][0] for x in timing.video ]
df2 = df2.loc[inds,:]

# Select the columns that you want
cnames = []
for i in range(6):
    cnames.append('shape.trait%i' % (i+1))
    cnames.append('resid.trait%i' % (i+1))
df3 = df2.ix[:,cnames]
omat = df3.as_matrix()

In [27]:
df2.head()

Unnamed: 0.1,Unnamed: 0,shape.trait1,resid.trait1,trait1,shape.trait2,resid.trait2,trait2,shape.trait3,resid.trait3,trait3,shape.trait4,resid.trait4,trait4,shape.trait5,resid.trait5,trait5,shape.trait6,resid.trait6,trait6
482,Lynne_McGranger,0.37739,0.077966,0.453085,0.276889,-0.479171,-0.212623,-0.144517,-0.982691,-1.123844,0.117481,0.898306,1.088175,-0.253472,-0.564142,-0.60835,0.141007,-0.576507,-0.444232
433,Larry_Hagman_U215_vid05_clip01_2secs,0.426403,-0.219128,0.204205,0.447822,0.158243,0.588582,-0.424644,1.012946,0.5873,0.077353,0.120756,0.245249,0.032783,0.338315,0.342729,0.017972,0.221338,0.23081
499,manyfaces01_vid03_clip01_face08_2secs,-0.12935,-0.73665,-0.860008,0.257415,-0.491373,-0.243485,0.348265,-0.728829,-0.369517,-0.179699,-0.463558,-0.757848,0.330098,0.829982,0.884896,-0.286467,0.906438,0.612043
350,Jacques_Rogge_U089_vid02_clip06_2secs,0.178798,1.043214,1.222979,0.046511,-0.570278,-0.524481,-0.178893,0.619117,0.443052,0.058906,-0.029306,0.065134,0.125046,-0.596559,-0.576475,0.508256,-0.37963,0.119204
655,Steevy_Boulay,-0.639422,-0.877662,-1.502776,-0.324196,-0.67732,-0.986738,0.182827,0.249862,0.441156,0.101871,-0.402218,-0.237781,0.152435,0.116587,0.141324,0.311457,-0.369885,-0.06748


In [36]:
load_traitfas_givenshapes(timing)


## Save the column names
#cnames = ['shape_gender', 'resid_gender', 'shape_age', 'resid_age']

#return (cnames, omat)

(['shape.trait1',
  'resid.trait1',
  'shape.trait2',
  'resid.trait2',
  'shape.trait3',
  'resid.trait3',
  'shape.trait4',
  'resid.trait4',
  'shape.trait5',
  'resid.trait5',
  'shape.trait6',
  'resid.trait6'],
 array([[ 0.61998941,  0.22938528, -0.27617159, ...,  0.65660999,
          0.45431506, -0.23090589],
        [ 0.13864001,  0.52905331, -0.02439548, ...,  0.49908887,
          0.47132391,  0.17759158],
        [ 0.59446093,  0.6154013 ,  0.42817143, ..., -0.0665591 ,
         -0.60392204,  0.38502804],
        ..., 
        [-0.29111518,  1.24690195,  0.32044439, ...,  1.09169953,
         -0.17577261, -0.00733365],
        [ 0.04849344, -0.48560833, -0.08958192, ...,  1.54786802,
          0.31461865,  0.82482352],
        [ 0.29334646,  0.30760277,  0.11737209, ...,  1.05420262,
          0.41571903,  0.46755991]]))

In [35]:
df3.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'