In [1]:
# clear all the variables & load the necessary modules
%reset -f
import numpy as np
import sys
import turicreate as tc
import turicreate.aggregate as agg
import os
from os import walk
import fnmatch
import scipy.io
import pandas as pd
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from scipy.stats import norm
from statsmodels.formula.api import ols
from scipy.stats.mstats import zscore
import scipy.stats as st
import random
from plotly import tools
# Path variable (where the data is located)
myPath = '../data/processed_files/MOT_json_files/'
sys.path.append(myPath)
# Get the file information in the directory
file_list = []
for root, dirs, files  in walk(myPath):
    for filename in files:
        if fnmatch.fnmatch(filename.lower(),"*behavior*"):
            file_list.append(filename)
file_list = [s for s in file_list if "TCent" not in s]

In [None]:
trajectory_and_eye_data = tc.SFrame.read_json(myPath+file_list[0], orient = 'records')
trajectory_and_eye_data['Subject ID'] = int(file_list[0][-3:])
for file in file_list[1:]:
    temp = tc.SFrame.read_json(myPath+file, orient='records')
    temp['Subject ID'] = int(file[-3:])
    trajectory_and_eye_data = trajectory_and_eye_data.append(temp)

In [6]:
trajectory_and_eye_data = trajectory_and_eye_data.sort(['Subject ID','Trial number', 'Frame number'])

In [7]:
sub_data = trajectory_and_eye_data[trajectory_and_eye_data['Subject ID']==105]
trial_data1 = sub_data[sub_data['Trial number']==11]
trial_data2 = sub_data[sub_data['Trial number']==100]
trial_data1.explore()
trial_data2.explore()

In [8]:
trajectory_and_eye_data = trajectory_and_eye_data[0:len(trajectory_and_eye_data):3]
#trajectory_and_eye_data.show()

In [9]:
def compute_centroid(numtargets,x_columns,df,target=True):
    if target:
        centroid = np.mean([df.get(x_column) for x_column in x_columns[0:numtargets]])
    else:
        centroid = np.mean([df.get(x_column) for x_column in x_columns[numtargets:2*numtargets]])
    return centroid

def compute_euc_dist(column1,column2,column3,column4,eccentricity,scale,x):
    dist = tc.distances.euclidean([x[column1],x[column2]],[x[column3],x[column4]])
    dist = dist/eccentricity
    return dist
def compute_euc_dist_origin(column1,column2,eccentricity,scale,x):
    dist = tc.distances.euclidean([x[column1],x[column2]],[0,0])
    dist = dist/eccentricity
    return dist

In [10]:
column_names = trajectory_and_eye_data.column_names()
x_columns = [col for col in column_names if ".x" in col]
y_columns = [col for col in column_names if ".y" in col]

In [11]:
eccentricity = 33.6
# Compute Tcentroid, All_Centroid in the data

trajectory_and_eye_data['T_Centroid.x']=trajectory_and_eye_data.apply(lambda x: compute_centroid(x['Number of Targets'],x_columns,x,target=True))
trajectory_and_eye_data['T_Centroid.y']=trajectory_and_eye_data.apply(lambda x: compute_centroid(x['Number of Targets'],y_columns,x,target=True))

trajectory_and_eye_data['D_Centroid.x']=trajectory_and_eye_data.apply(lambda x: compute_centroid(x['Number of Targets'],x_columns,x,target=False))
trajectory_and_eye_data['D_Centroid.y']=trajectory_and_eye_data.apply(lambda x: compute_centroid(x['Number of Targets'],y_columns,x,target=False))

trajectory_and_eye_data['All_Centroid.x']=trajectory_and_eye_data.apply(lambda x: compute_centroid(2*x['Number of Targets'],x_columns,x))
trajectory_and_eye_data['All_Centroid.y']=trajectory_and_eye_data.apply(lambda x: compute_centroid(2*x['Number of Targets'],y_columns,x))

In [12]:
eccentricity = 33.6
scale = 1600
trajectory_and_eye_data['distOrigin'] = trajectory_and_eye_data.apply(lambda x: compute_euc_dist_origin('fixation.x','fixation.y',eccentricity,scale,x))
trajectory_and_eye_data['TCent_fix_dist'] = trajectory_and_eye_data.apply(lambda x: compute_euc_dist('T_Centroid.x','T_Centroid.y','fixation.x','fixation.y',
                                                                               eccentricity,scale,x))
trajectory_and_eye_data['DCent_fix_dist'] = trajectory_and_eye_data.apply(lambda x: compute_euc_dist('D_Centroid.x','D_Centroid.y','fixation.x','fixation.y',
                                                                               eccentricity,scale,x))
trajectory_and_eye_data['AllCent_fix_dist'] = trajectory_and_eye_data.apply(lambda x: compute_euc_dist('All_Centroid.x','All_Centroid.y','fixation.x','fixation.y',
                                                                                eccentricity,scale,x))

In [13]:
threshold = 2
trajectory_and_eye_data['TCent_fix_counts'] = trajectory_and_eye_data.apply(lambda x: 1 if x['TCent_fix_dist']<=threshold else 0)
trajectory_and_eye_data['DCent_fix_counts'] = trajectory_and_eye_data.apply(lambda x: 1 if x['DCent_fix_dist']<=threshold else 0)
trajectory_and_eye_data['AllCent_fix_counts'] = trajectory_and_eye_data.apply(lambda x: 1 if x['AllCent_fix_dist']<=threshold else 0)

In [14]:
trajectory_and_eye_data.explore()

In [15]:
behavior_data = tc.SFrame.read_csv('./data/MOT_csv_files/behavior_data/' + 'behavior_data.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,int,int,int,int,int,int,float,int,int,int,int,float,int,int,int,int,int,int,int,int,int,int,int,int,int,int,int,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [16]:
individual_ranking = behavior_data.groupby('subject_Num',operations = {'avg_accuracy':agg.MEAN('accuracy')})
individual_ranking = individual_ranking.sort('subject_Num')
individual_ranking = individual_ranking.sort('avg_accuracy',ascending=False)
individual_ranking['rank'] = np.arange(1,len(individual_ranking)+1)
individual_ranking = individual_ranking.sort('subject_Num')
individual_ranking

subject_Num,avg_accuracy,rank
101,0.7477083333333335,16
102,0.8009250000000003,2
103,0.7633583333333335,10
104,0.7680833333333337,8
105,0.6633083333333334,43
106,0.7353749999999998,23
107,0.7586166666666667,12
108,0.7548249999999999,14
109,0.7060833333333331,30
110,0.6245749999999994,48


In [17]:
trajectory_and_eye_data.explore()

In [18]:
sub_wise_centroid_counts = trajectory_and_eye_data.groupby(['Subject ID'], operations = {'avg_TCent_counts':agg.MEAN('TCent_fix_counts'),
                                                                                                       'avg_DCent_counts':agg.MEAN('DCent_fix_counts'),
                                                                                                       'avg_AllCent_counts':agg.MEAN('AllCent_fix_counts')})
#sub_wise_centroid_counts['avg_TCent_counts'] = sub_wise_centroid_counts['avg_TCent_counts']
#sub_wise_centroid_counts['avg_DCent_counts'] = sub_wise_centroid_counts['avg_DCent_counts']
#sub_wise_centroid_counts['avg_AllCent_counts'] = sub_wise_centroid_counts['avg_AllCent_counts']
sub_wise_centroid_counts = sub_wise_centroid_counts.sort(['Subject ID'])
sub_wise_centroid_counts.explore()

In [19]:
individual_ranking['avg_TCent_Counts'] = sub_wise_centroid_counts['avg_TCent_counts']
individual_ranking['avg_DCent_Counts'] = sub_wise_centroid_counts['avg_DCent_counts']
individual_ranking['avg_AllCent_Counts'] = sub_wise_centroid_counts['avg_AllCent_counts']
individual_ranking = individual_ranking.sort('rank')

In [20]:
hsb = individual_ranking.to_dataframe()

mod = ols("avg_TCent_Counts ~ rank", data=hsb)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:       avg_TCent_Counts   R-squared:                       0.140
Model:                            OLS   Adj. R-squared:                  0.122
Method:                 Least Squares   F-statistic:                     7.839
Date:                Tue, 27 Aug 2019   Prob (F-statistic):            0.00734
Time:                        13:01:02   Log-Likelihood:                 116.65
No. Observations:                  50   AIC:                            -229.3
Df Residuals:                      48   BIC:                            -225.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0953      0.007     13.860      0.0

In [34]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import colorlover as cl
init_notebook_mode(connected=True)
trace = []
x = individual_ranking['rank'].to_numpy()
y = individual_ranking['avg_TCent_Counts'].to_numpy()
trace.append(go.Scatter(x = x,
                       y = y,
                       mode='markers',
                       marker = dict(size = 14, opacity = 0.5, color = 'gray', line = dict(width = 1.5, color='black')), showlegend=False))

x_lin_space = np.linspace(x.min(), x.max(),100)
y_lin_space = res.params[0] + res.params[1]*x_lin_space
trace.append(go.Scatter(x = x_lin_space, y = y_lin_space,
                       mode='lines',line=Line(color='gray', dash = 'dash', width = 3.5), showlegend=False,
                       ))

annotations=[
        dict(
            x=10,
            y=.2,
            xref='x1',
            yref='y1',
            text='$r^{2} : 0.14^{*}$',
            showarrow=False,
            ax=0,
            ay=-40
        ),
        dict(
        x = 35,
        y = .2,
        xref='x1',
        yref = 'y1',
        text = '$n = 50$',
        showarrow=False,
        ax = 0,
        ay = -40,
        )
    ]


fig = go.Figure(data=trace)
fig['layout'].update(height=500, width=500, font = dict(size=14))
fig['layout']['annotations'] = annotations

fig['layout']['xaxis'].update(title='Overall participant rank <br> in the experiment')
fig['layout']['yaxis'].update(title='Avg. % time spent looking at the <br> target centroid')
fig['layout'].update(paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor ='rgba(0,0,0,0)', font = dict(size = 12, color = 'black'))

fig.update_xaxes(showgrid=True, gridwidth=1.5, gridcolor='#DFDFDF', zeroline = True, zerolinecolor = '#DFDFDF', showline=True, linecolor = '#AFAFAF', linewidth = 2.5, nticks = 6)
fig.update_yaxes(showgrid=True, gridwidth=1.5, gridcolor='#DFDFDF', showline=True, linecolor = '#AFAFAF', linewidth = 2.5, nticks = 6)
fig.show()
fig.write_image("/Users/supadhy6/Dropbox/Shared with Adi/Results_images/centroid_looking_as_a_function_of_individual_rank.png")
fig.write_image("/Users/supadhy6/Dropbox/Shared with Adi/Results_images/centroid_looking_as_a_function_of_individual_rank.svg")
#py.iplot(fig, filename='model-vs-human-accuracy-ranked-correlation')

In [30]:
best_sub = []
for trial in behavior_data['trial_Num_Original'].unique().sort():
    trial_data = behavior_data[behavior_data['trial_Num_Original']==trial]
    trial_data = trial_data.sort('accuracy',ascending=False)
    best_sub.append(trial_data[0]['subject_Num'])

In [31]:
sub_info = behavior_data['subject_Num'].unique().sort()

In [32]:
from scipy.stats.stats import pearsonr
eye_movement_similarity = []
corr = []
pval = []
for i,trial in enumerate(trajectory_and_eye_data['Trial number'].unique().sort()):
    population_trial_data = trajectory_and_eye_data[trajectory_and_eye_data['Trial number']==trial]
    best_eye_data = population_trial_data[population_trial_data['Subject ID']==best_sub[i]]['distOrigin'].to_numpy()
    for sub in list(set(sub_info) - set([best_sub[i]])):
        sub_eye_data = population_trial_data[population_trial_data['Subject ID']==sub]['distOrigin'].to_numpy()
        [r, p] = pearsonr(sub_eye_data, best_eye_data)
        corr.append(r)
        pval.append(p)


invalid value encountered in double_scalars



In [33]:
set([best_sub[1]])

{101}

In [34]:
ranks = individual_ranking[individual_ranking['rank']!=1]['rank'].to_numpy()
eye_movement_similarity = tc.SFrame()
eye_movement_similarity['correlation'] = corr
eye_movement_similarity['pval'] = pval
eye_movement_similarity['ranking'] = np.tile(ranks,120)
eye_movement_similarity['Trial number'] = np.repeat(np.arange(1,121),49)
target_data = behavior_data.groupby('trial_Num_Original',operations ={'num_targets':agg.MEAN('num_targets'), 'speed':agg.MEAN('speed (deg/sec)')})
target_data = target_data.sort('trial_Num_Original')
eye_movement_similarity['Target number'] = np.repeat(target_data['num_targets'].to_numpy(),49)
eye_movement_similarity['speed'] = np.repeat(target_data['speed'].to_numpy(),49)

In [35]:
eye_movement_similarity = eye_movement_similarity.sort(['Trial number','ranking'])

In [36]:
temp = eye_movement_similarity
def fillnan(x):
    if x>=0.05:
        x = np.nan
    return x
temp['pval'] = temp['pval'].apply(lambda x: fillnan(x))

In [37]:
eye_movement_similarity = temp
#eye_movement_similarity.export_json('eye_movement_similarity_best_performer_within_trial.json',orient='records')

In [44]:
#eye_movement_similarity = tc.SFrame.read_json('eye_movement_similarity_best_performer_within_trial.json',orient='records')

In [46]:
eye_movement_similarity = eye_movement_similarity.dropna()

    
sec2_trace = []
sec3_trace = []
annotations = []
for i,target in enumerate(eye_movement_similarity['Target number'].unique().sort()):
    target_load_eye_similarity = eye_movement_similarity[eye_movement_similarity['Target number']==target]
    target_load_eye_similarity = target_load_eye_similarity.sort('ranking')
    target_load_eye_similarity = target_load_eye_similarity.groupby('ranking',operations={'avg_correlation':agg.MEAN('correlation')})
    target_load_eye_similarity = target_load_eye_similarity.sort('ranking')
    sec2_trace.append(go.Scatter(x = target_load_eye_similarity['ranking'].to_numpy(),
                           y = target_load_eye_similarity['avg_correlation'].to_numpy(),
                           mode='markers',marker = dict(opacity = 0.3),
                           line=dict(width = 2),
                           name = 'Target load : '+ str(target)))
    hsb = target_load_eye_similarity
    hsb2 = hsb.to_dataframe()

    mod = ols("avg_correlation ~ ranking", data=hsb2)
    res = mod.fit()
    annotations.append(dict(x = 20, y = 0.2, xref = 'x'+str(i+1), yref = 'y'+str(i+1), text = 'rsq :'+str(round(res.rsquared,3)),
                           showarrow = False))
    
    sec3_trace.append(go.Scatter(x = np.linspace(target_load_eye_similarity['ranking'].min(),target_load_eye_similarity['ranking'].max(),100),
                   y = np.linspace(target_load_eye_similarity['ranking'].min(),target_load_eye_similarity['ranking'].max(),100)*res.params[1] + res.params[0],
                                mode='lines', line = dict(color='gray'), showlegend=False))

fig = tools.make_subplots(rows=2, cols=3)
fig.append_trace(sec2_trace[0],1,1)
fig.append_trace(sec2_trace[1],1,2)
fig.append_trace(sec2_trace[2],1,3)
fig.append_trace(sec2_trace[3],2,1)
fig.append_trace(sec2_trace[4],2,2)
fig.append_trace(sec2_trace[5],2,3)

fig.append_trace(sec3_trace[0],1,1)
fig.append_trace(sec3_trace[1],1,2)
fig.append_trace(sec3_trace[2],1,3)
fig.append_trace(sec3_trace[3],2,1)
fig.append_trace(sec3_trace[4],2,2)
fig.append_trace(sec3_trace[5],2,3)


    
    

fig['layout'].update(height=700, width=1000, font = dict(size=15))
fig['layout']['legend'].update(traceorder='normal')
fig['layout']['annotations'] = annotations

fig['layout']['yaxis'].update(title='avg Eye movement similarity index')
fig['layout']['xaxis5'].update(title='overall individual rank in the experiment')


iplot(fig, filename='subejct ranks vs. eye movement similarity')    

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]
[ (2,1) x4,y4 ]  [ (2,2) x5,y5 ]  [ (2,3) x6,y6 ]



In [69]:
average_eye_similarity = eye_movement_similarity.groupby('ranking',operations={'avg_correlation':agg.MEAN('correlation')})
average_eye_similarity = average_eye_similarity.sort('ranking')

trace = []
trace.append(go.Scatter(x = average_eye_similarity['ranking'].to_numpy(),
                           y = average_eye_similarity['avg_correlation'].to_numpy(),
                           mode='markers',marker = dict(opacity = 0.3, size = 15),
                           line=dict(width = 2)))
hsb = average_eye_similarity
hsb2 = hsb.to_dataframe()

mod = ols("avg_correlation ~ ranking", data=hsb2)
res = mod.fit()
annotations = []
annotations.append(dict(x = 20, y = 0.2, xref = 'x', yref = 'y', text = 'rsq :'+str(round(res.rsquared,3)),
                       showarrow = False))
    
trace.append(go.Scatter(x = np.linspace(average_eye_similarity['ranking'].min(),average_eye_similarity['ranking'].max(),100),
               y = np.linspace(average_eye_similarity['ranking'].min(),average_eye_similarity['ranking'].max(),100)*res.params[1] + res.params[0],
                            mode='lines', line = dict(color='gray'), showlegend=False))

fig = go.Figure(data=trace)
#fig['layout'].update(height=700, width=1000, margin = dict(l=130, b=130, r=80, t=150), font = dict(size=15))
fig['layout']['annotations'] = annotations

fig['layout']['xaxis'].update(title='Overall participant rank in the experiment')
fig['layout']['yaxis'].update(title='Avg Eye movement similarity with the <br>best participant - at trial level')

iplot(fig, filename='subejct ranks vs. eye movement similarity')    