In [39]:
import sqlite3
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
from urllib.request import urlopen
from json import load
sns.set_style('white')
sns.set_context('notebook')

# 1. Import Data

In [49]:
# ASC Cohort
conn = sqlite3.connect('./Data/knwlg_blf.db')
#dat1 = pd.read_sql("SELECT * FROM trials A INNER JOIN subjects B ON A.prolific_id = B.prolific_id WHERE B.completion_code LIKE '%XXX%'", con=conn)
dat1 = pd.read_sql("SELECT * FROM trials A INNER JOIN subjects B ON A.prolific_id = B.prolific_id "
                   "WHERE B.block1_complete=TRUE "
                   "AND B.block2_complete=TRUE "
                   "AND B.block3_complete=TRUE "
                   "AND B.completion_code LIKE '%XXX%'", con=conn)
dat1 = dat1.loc[:,~dat1.columns.duplicated()].copy() # prolific id from both tables will be in df, remove duplicate
# REMOVE DISTRACTORS
dat1 = dat1.loc[dat1.trial_type == 'test'].reset_index(drop=True)
dat1['group'] = 'Autism'

# TD cohort
conn2 = sqlite3.connect('./Data/knwlg_blf_controls.db')
dat2 = pd.read_sql('SELECT * FROM trials A INNER JOIN subjects B ON A.prolific_id = B.prolific_id WHERE B.completion_code="548DA3BD"', con=conn2)
dat2 = dat2.loc[:,~dat2.columns.duplicated()].copy() # prolific id from both tables will be in df, remove duplicate
# REMOVE DISTRACTORS
dat2 = dat2.loc[dat2.trial_type == 'test'].reset_index(drop=True)
dat2['email'] = 'N/A'
dat2['group'] = 'Neurotypical'

# Combine into one big dataframe
dat = pd.concat(objs=[dat1,dat2], ignore_index=True)
#
dat['email'] = dat.email.str.lower().str.strip()

# data saved for testing purposes has id as float instead of alphanumeric so only keep alphanumeric IDs
dat = dat[dat.prolific_id.str.isalnum()].reset_index(drop=True)
dat['response_key'] = dat.response_key.apply(literal_eval)

In [58]:
dat.loc[dat.email =='harrisonstephh@outlook.com']
#dat.email.sort_values( axis=0).unique()

Unnamed: 0,id,trial_num,correct,trial_type,scenario,belief_type,ascription_type,correct_answer,target_onset,response_onset,...,block3_complete,completion_code,feedback,completion_time,group,rt,rt_ms,study_duration,accepted_answer,timeout
624,1705,1,False,test,12,IG,Thinks,f,2022-08-31 07:39:36.584,2022-08-31 07:39:38.088,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:01.504000,1504,,True,False
625,1708,4,False,test,11,FB,Thinks,f,2022-08-31 07:40:56.711,2022-08-31 07:41:01.380,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:04.669000,4669,,True,False
626,1709,5,False,test,6,FB,Knows,f,2022-08-31 07:41:40.085,2022-08-31 07:41:45.184,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:05.099000,5099,,True,True
627,1712,8,False,test,9,TB,Thinks,j,2022-08-31 07:43:05.262,2022-08-31 07:43:08.388,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:03.126000,3126,,True,False
628,1713,9,False,test,5,IG,Thinks,f,2022-08-31 07:43:32.583,2022-08-31 07:43:36.033,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:03.450000,3450,,True,False
629,1714,10,False,test,7,FB,Thinks,f,2022-08-31 07:43:58.745,2022-08-31 07:44:03.845,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:05.100000,5100,,True,True
630,1718,14,True,test,4,TB,Knows,j,2022-08-31 07:45:52.835,2022-08-31 07:45:54.386,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:01.551000,1551,,True,False
631,1720,16,False,test,10,IG,Knows,f,2022-08-31 07:46:49.176,2022-08-31 07:46:50.756,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:01.580000,1580,,True,False
632,1721,17,False,test,3,TB,Knows,j,2022-08-31 07:47:14.384,2022-08-31 07:47:19.483,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:05.099000,5099,,True,True
633,1723,19,False,test,1,FB,Knows,f,2022-08-31 07:48:09.709,2022-08-31 07:48:15.748,...,1,XXXY,The game is nice.,NaT,Autism,0 days 00:00:06.039000,6039,,True,True


## Cleaning

## Convert SQL strings to python datetime object and compute timedeltas

In [51]:
def convert_time(x):
    if type(x) == str:
        return datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f')

dat[['response_onset','target_onset','participation_date','completion_time']] = dat[['response_onset','target_onset','participation_date','completion_time']].applymap(convert_time)

dat['rt'] = dat.apply(lambda row: row['response_onset'] - row['target_onset'], axis=1)
dat['rt_ms'] = dat.apply(lambda row: int(round(row['rt'].total_seconds() * 1e3)), axis=1)

## Time to Complete

In [52]:
#dat.iloc[2]['completion_time']
dat[~dat.completion_time.isnull()].completion_time.values[0] - dat[~dat.completion_time.isnull()].participation_date.values[0]

a = dat[~dat.completion_time.isnull()].iloc[0].completion_time#.groupby('prolific_id').completion_time.count()
b = dat[~dat.completion_time.isnull()].iloc[0].participation_date#.groupby('prolific_id').completion_time.count()
b

Timestamp('2022-09-06 22:32:59.570970')

In [53]:
idx = dat[~dat.completion_time.isnull()].index
dat['study_duration'] = None
dat.loc[idx, 'study_duration'] = dat.loc[idx].completion_time - dat.loc[idx].participation_date
#dat.loc[idx].groupby('prolific_id').mean()

# 2. Accuracy

### Note where subjects entered invalid responses

In [54]:
accepted_answers = ['f', 'j', '999']
dat['accepted_answer'] = None
for i, response in enumerate(dat.response_key):
    if sum([key.lower() in accepted_answers for key in response]) >= 1:  # == len(response):
        dat.loc[i, 'accepted_answer'] = True
    else:
        dat.loc[i, 'accepted_answer'] = False

print(f"{len(dat.loc[dat.accepted_answer == False].prolific_id.unique())} participants responded at least one trial with invalid keys. \n"
      f"{len(dat.loc[(dat.accepted_answer == False)&(dat.group == 'Neurotypical')].prolific_id.unique())} of these are from the neurotypical group. \n"
      f" {len(dat.loc[(dat.accepted_answer == False)&(dat.group == 'Autism')].prolific_id.unique())} of these are from the autism group.")

73 participants responded at least one trial with invalid keys. 
16 of these are from the neurotypical group. 
 57 of these are from the autism group.


### Note trials where subjects timed-out after the 5-seconds

In [55]:
dat['timeout'] = False
# Here are timeouts without a response at all
dat.loc[dat.apply(lambda row: '999' in row['response_key'], axis=1), 'timeout'] = True
# Here is where pps pressed a key after 5 seconds, but before the experiment redirected
timeouts = dat.loc[(dat.response_key.apply(len)>1) & (dat.apply(lambda row: '999' in row['response_key'], axis=1))].index
dat.loc[timeouts, 'timeout'] = True

print(f"{len(dat.loc[dat.timeout == True].prolific_id.unique())} participants timed out during at least one trial. \n"
      f"{len(dat.loc[(dat.timeout == True)&(dat.group == 'Neurotypical')].prolific_id.unique())} of these are from the neurotypical group. \n"
      f" {len(dat.loc[(dat.timeout == True)&(dat.group == 'Autism')].prolific_id.unique())} of these are from the autism group.")

429 participants timed out during at least one trial. 
176 of these are from the neurotypical group. 
 253 of these are from the autism group.


In [56]:
# If subjects hit more than one key, use only the last one
dat['response_key'] = [resp[-1].lower() for resp in dat.response_key]

In [57]:
#  correct answers
dat.loc[dat.belief_type == 'TB', 'correct_answer'] = 'j'
dat.loc[dat.belief_type != 'TB', 'correct_answer'] = 'f'
# update correct column based on new answer key
dat['correct'] = dat.apply(lambda row: row['response_key'] == row['correct_answer'], axis=1)

# timeouts are considered incorrect
dat.loc[dat.timeout == True, 'correct'] = False

## Exclude participants with mean accuracy < .6

In [None]:
dat.loc[dat.group == 'Autism'].groupby('prolific_id').correct.mean().hist()

In [None]:
print(f'N autism cohort before exclusion: {len(dat.loc[dat.group == "Autism"].prolific_id.unique())}')
## Exclude participants with mean accuracy < .6
xx = dat.groupby('prolific_id').correct.mean() > .6
dat = dat.loc[dat.prolific_id.isin(xx[xx == True].index.tolist())]

In [None]:
print(f'N autism cohort after exclusion: {len(dat.loc[dat.group == "Autism"].prolific_id.unique())}')

In [None]:
v_dat = dat#.loc[(dat.accepted_answer == True)]
acc = pd.DataFrame(columns=['cohort', 'agent_state', 'ascription', 'pct_correct', 'pct_incorrect'])
for gro in dat['group'].unique():
    for bel in dat.belief_type.unique():
        for ascrip in dat.ascription_type.unique():
            dd = v_dat.loc[(v_dat.group == gro)&(v_dat.belief_type==bel)&(v_dat.ascription_type==ascrip)]
            pc = len(dd.loc[dd.correct== True])/len(dd)
            ndf = pd.DataFrame({'cohort':[gro], 'agent_state':[bel], 'ascription':[ascrip], 'pct_correct':[pc], 'pct_incorrect':[1-pc]})
            acc = pd.concat([acc,ndf ], axis=0)#.reset_index(drop=True)
#sns.barplot(x='belief_type', row='group', x='ascription_type', hue='correct', kind='bar', data=)
#sns.histplot(data=v_dat, x='belief_type', hue='correct', multiple='stack', element='bars', stat='percent')

In [None]:
import matplotlib.patches as mpatches
acc['total'] = 1
acc.melt(id_vars=['cohort', 'agent_state', 'ascription'], value_vars=['pct_correct', 'total'], value_name='Percent', var_name='Accuracy')
#sns.catplot(col='belief_type', row='group', x='correct', hue='ascription_type', kind='count', data=dat)
#dat.groupby('group').prolific_id.describe()
# bar chart 1 -> top bars (group of 'smoker=No')
#f, ax = plt.subplots(1, 2)
#bar1 = sns.barplot(x="agent_state",  y="total", hue='ascription', data=acc.loc[acc.cohort == 'Neurotypical'], color='darkblue', ax=ax[0])
#bar1a = sns.barplot(x="agent_state",  y="total", hue='ascription', data=acc.loc[acc.cohort == 'Autism'],  color='darkblue', ax=ax[1])

# bar chart 2 -> bottom bars (group of 'smoker=Yes')
#bar2 = sns.barplot(x="agent_state",  y="pct_correct", hue='ascription', data=acc.loc[acc.cohort == 'Neurotypical'], color='darkblue', ax=ax[0])
#bar2a = sns.barplot(x="agent_state",  y="pct_correct", hue='ascription', data=acc.loc[acc.cohort == 'Autism'], color='darkblue', ax=ax[1])

bar2 = sns.catplot(x="agent_state", y="pct_correct", hue='ascription', data=acc,col='cohort', kind='bar')
# add legend


In [None]:
total = v_dat.groupby(['belief_type', 'ascription_type'])['accepted_answer'].sum().reset_index()
correct = v_dat[v_dat.correct == True].groupby(['belief_type', 'ascription_type'])['accepted_answer'].sum().reset_index()

correct['pct'] = [i / j * 100 for i,j in zip(len(correct), len(total))]
total['pct'] = [i / j * 100 for i,j in zip(len(total), len(total))]

# bar chart 1 -> top bars (group of 'smoker=No')
bar1 = sns.barplot(x="belief_type",  y="pct", data=total, color='darkblue')

# bar chart 2 -> bottom bars (group of 'smoker=Yes')
bar2 = sns.barplot(x="belief_type", y="pct", data=correct, color='lightblue')

# add legend
top_bar = mpatches.Patch(color='darkblue', label='incorrect')
bottom_bar = mpatches.Patch(color='lightblue', label='correct')
plt.legend(handles=[top_bar, bottom_bar])


In [None]:
len(dat.loc[dat.group == 'Autism'].prolific_id.unique())

In [None]:
len(dat.loc[dat.group == 'Autism'].prolific_id.unique())


In [None]:
sns.catplot(col='belief_type', x='correct', hue='ascription_type', kind='count', data=dat)


## Check to make sure all responses came AFTER the target onset

In [None]:
sum(dat.apply(lambda row: row['response_onset'] > row['target_onset'], axis=1)) == len(dat)


## Exclude subjects with mean RTs less than 1500ms or more than 4000ms

In [None]:
xx = dat.groupby('prolific_id').rt_ms.mean() < 1500
dat = dat.loc[dat.prolific_id.isin(xx[xx == False].index.tolist())]
yy = dat.groupby('prolific_id').rt_ms.mean() > 4000
dat = dat.loc[dat.prolific_id.isin(yy[yy == False].index.tolist())]

for old, new in {'IG': 'Ignorance', 'TB': 'True Info', 'FB': 'False Info'}.items():
    dat.belief_type.replace(old, new, inplace=True)

# 3. Location

In [34]:
adat = dat.loc[dat.group == 'Autism']
adat[['city', 'region', 'country', 'loc', 'org', 'postal', 'timezone']] = None

for subj in adat.prolific_id.unique():
    addr = adat.loc[adat.prolific_id == subj, 'ip_addy'].values[0]
    url = 'https://ipinfo.io/' + addr + '/json'
    res = urlopen(url)
    #response from url(if res==None then check connection)
    data = load(res)
    try:
        adat.loc[adat.prolific_id == subj, ['city', 'region', 'country', 'loc', 'org', 'postal', 'timezone', 'email', 'participation_date', 'study_duration']] = [data['city'], data['region'], data['country'], data['loc'], data['org'], data['postal'], data['timezone'], adat.loc[adat.prolific_id == subj, 'email'].values[0], adat.loc[adat.prolific_id == subj, 'participation_date'].values[0], adat.loc[adat.prolific_id == subj, 'study_duration'].values[0]]
    except:
        print(f'issue with id: {subj}')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adat[['city', 'region', 'country', 'loc', 'org', 'postal', 'timezone']] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, v, pi)


issue with id: hkaDfNtDY7Vic6Q2
issue with id: KcsjMENf2Rb52asn
issue with id: QFgA4FniYFQUJiLD
issue with id: cd1W9YvW5FmGdjut
issue with id: skxn7ToatEqrJCMk
issue with id: fAq7JhFiusgsyHBu


In [19]:
addr = adat.loc[adat.prolific_id == 'QngbaGvRLhEOyfRf', 'ip_addy'].values[0]
url = 'https://ipinfo.io/' + addr + '/json'
res = urlopen(url)
#response from url(if res==None then check connection)
data = load(res)
data

{'ip': '75.117.243.66',
 'hostname': 'h66.243.117.75.dynamic.ip.windstream.net',
 'city': 'Concord',
 'region': 'North Carolina',
 'country': 'US',
 'loc': '35.4089,-80.5816',
 'org': 'AS7029 Windstream Communications LLC',
 'postal': '28027',
 'timezone': 'America/New_York',
 'readme': 'https://ipinfo.io/missingauth'}

In [35]:
xx = adat.groupby(['prolific_id','participation_date', 'email', 'ip_addy','study_duration', 'city', 'region', 'country', 'loc', 'org', 'postal', 'timezone']).correct.mean()#.sort_values('correct')
yy = xx.reset_index().sort_values('correct', axis=0)
yy

Unnamed: 0,prolific_id,participation_date,email,ip_addy,study_duration,city,region,country,loc,org,postal,timezone,correct
34,8WSVThF1sSOcQQYr,2022-09-08 03:34:07.612076,giftkevin728@gmail.com,5.62.59.55,0 days 01:33:45.883202,Raleigh,North Carolina,US,"35.7721,-78.6386","AS29889 Fast Serv Networks, LLC",27601,America/New_York,0.000000
126,dk2kUNMlfjAmlDVC,2022-09-08 14:22:00.203119,gm0766838@gmail.com,156.146.36.110,0 days 00:27:39.253389,New York City,New York,US,"40.7143,-74.0060",AS60068 Datacamp Limited,10004,America/New_York,0.000000
163,p5JLgP3WMn82fe7K,2022-09-08 15:15:51.130380,deanmichelle362@gmail.com,102.89.34.100,0 days 00:51:26.307833,Santa Clara,California,US,"37.3924,-121.9623","AS14061 DigitalOcean, LLC",95054,America/Los_Angeles,0.000000
164,p5JLgP3WMn82fe7K,2022-09-08 15:15:51.130380,deanmichelle362@gmail.com,143.198.78.59,0 days 00:51:26.307833,Santa Clara,California,US,"37.3924,-121.9623","AS14061 DigitalOcean, LLC",95054,America/Los_Angeles,0.000000
165,p5JLgP3WMn82fe7K,2022-09-08 15:15:51.130380,deanmichelle362@gmail.com,5.161.77.55,0 days 00:51:26.307833,Santa Clara,California,US,"37.3924,-121.9623","AS14061 DigitalOcean, LLC",95054,America/Los_Angeles,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,ojRZ1hEX6NkvzFR2,2022-09-08 07:53:26.880706,ladylondon38@gmail.com,198.7.56.244,0 days 00:25:20.510651,Washington,"Washington, D.C.",US,"38.8951,-77.0364","AS30633 Leaseweb USA, Inc.",20004,America/New_York,0.666667
159,oLy8v6Y1zTAoTCn5,2022-09-08 10:07:30.324559,soonwow041@gmail.com,149.57.16.179,0 days 00:29:17.908553,Wilmington,Delaware,US,"39.7460,-75.5466",AS212238 Datacamp Limited,19801,America/New_York,0.666667
188,tnGByjBjbUzRJwyf,2022-09-07 11:11:25.458222,dave@trahans.net,191.96.227.102,0 days 00:27:41.962866,New York City,New York,US,"40.7143,-74.0060",AS174 Cogent Communications,10004,America/New_York,0.833333
148,jcOXPqMioLJNLZ3D,2022-09-06 22:32:59.570970,johnnyldudley@gmail.com,174.105.249.50,0 days 00:27:16.157435,Columbus,Ohio,US,"39.9444,-82.8903",AS10796 Charter Communications Inc,43227,America/New_York,0.916667


# 2. Response Times

In [None]:
sns.set_context('notebook', font_scale=2)
fig, ax = plt.subplots(figsize=(15, 12))
g = sns.violinplot(x='belief_type', y='rt_ms', hue='ascription_type', hue_order=['Thinks', 'Knows'], data=dat.loc[(dat.trial_type=='test') & (dat.correct==True)], cut=0, bw=0.25, ax=ax, inner=None, linewidth=1, saturation=1)
g = sns.stripplot(x='belief_type', y='rt_ms', hue='ascription_type', hue_order=['Thinks', 'Knows'], data=dat.loc[(dat.trial_type=='test') & (dat.correct==True)],
                  color='k', dodge=True, ax=ax, jitter=.2, linewidth=1, alpha=.2)
g= sns.pointplot(x='belief_type', y='rt_ms', hue='ascription_type', hue_order=['Thinks', 'Knows'], data=dat.loc[(dat.trial_type=='test') & (dat.correct==True)], join=False, dodge=.4, capsize=.1, ax=ax, color='k',)

ax.legend(g.get_legend_handles_labels()[0][:2], g.get_legend_handles_labels()[1][:2])
g.yaxis.set_label_text("response time (ms)")
g.xaxis.set_label_text("Agent state")
ax.xaxis.labelpad = 20

#fig.savefig('proposal_S1_control_result.png')

In [None]:
len(dat.loc[(dat.trial_type=='test') & (dat.correct==True)].prolific_id.unique())

# 3. Felicity Judgements

In [None]:
f_dat = pd.read_sql('SELECT * FROM felicities A INNER JOIN subjects B ON A.prolific_id = B.prolific_id WHERE B.completion_code="XXXX"', con=conn)
f_dat = f_dat.loc[:,~f_dat.columns.duplicated()].copy() # prolific id from both tables will be in df, remove duplicate
# 1= "Sounds very weird"
# 7 = "Sounds very normal"
f_dat['Infelicity Rating'] = 8 - f_dat.felicity_rating

In [None]:
sns.set_context('paper', font_scale=2)
fig, ax = plt.subplots(figsize=(12,12))
g = sns.violinplot(x='fel_belief_type', y='Infelicity Rating', hue='fel_ascription_type', data=f_dat, cut=0, bw=0.25, ax=ax, inner=None, linewidth=1, saturation=1, order=['TB', 'IG', 'FB'])

def jitter(values,j):
    return values + np.random.normal(j,0.15,values.shape)

g = sns.stripplot(x='fel_belief_type', y=jitter(f_dat['Infelicity Rating'], 0), hue='fel_ascription_type', data=f_dat,
                  color='k', dodge=True, ax=ax, jitter=.45, linewidth=1.5, alpha=.1, order=['TB', 'IG', 'FB'])
g= sns.pointplot(x='fel_belief_type', y='Infelicity Rating', hue='fel_ascription_type', data=f_dat, join=False, dodge=.4, capsize=.1, ax=ax, color='k',order=['TB', 'IG', 'FB'])

ax.legend(g.get_legend_handles_labels()[0][:2], g.get_legend_handles_labels()[1][:2])

# 4. AQ-10

In [None]:

a_dat = pd.read_sql('SELECT * FROM autism_scores A INNER JOIN subjects B ON A.prolific_id = B.prolific_id WHERE B.completion_code="XXXX"', con=conn)
a_dat = a_dat.loc[:,~a_dat.columns.duplicated()].copy() # prolific id from both tables will be in df, remove duplicate

#a_dat = a_dat[a_dat.prolific_id.isin(dat.prolific_id.unique()).values] # only use subjects in trial analysis
a_dat.describe()

In [None]:
a_dat[a_dat.prolific_id.isin(dat.prolific_id.unique().tolist())]

In [None]:
## Scoring
agree = ['AQ_rating_1', 'AQ_rating_7', 'AQ_rating_8', 'AQ_rating_10'] # score of 3 or 4 get a point
disagree = ['AQ_rating_2','AQ_rating_3','AQ_rating_4','AQ_rating_5','AQ_rating_6','AQ_rating_9', ] # score of 1 or 2 get a point]
a = a_dat[agree] >= 3
b = a_dat[disagree] < 3
a_dat['AQ_score'] = a.sum(axis=1) + b.sum(axis=1)
a_dat = a_dat[a_dat.prolific_id.isin(dat.prolific_id.unique().tolist())]

fg, ax = plt.subplots(figsize=(10, 8))
sns.countplot(data=a_dat, x='AQ_score', ax=ax);

In [None]:
e_dat.groupby('diag').describe()

In [None]:
cdat = dat.loc[dat.correct ==True]
for subject in dat.prolific_id.unique():
    # calculate mean RT for belief ascription
    a_dat.loc[a_dat.prolific_id == subject, 'avg_ThinkRT'] = cdat.loc[(cdat.prolific_id == subject) & (cdat.ascription_type == 'Thinks')].rt_ms.mean()
    a_dat.loc[a_dat.prolific_id == subject, 'avg_KnowRT'] = cdat.loc[(cdat.prolific_id == subject) & (cdat.ascription_type == 'Knows')].rt_ms.mean()

a_dat['RT_diff'] = a_dat.apply(lambda row: row['avg_ThinkRT'] - row['avg_KnowRT'], axis=1)


In [None]:
sns.lmplot(x='AQ_score', y='RT_diff', data=a_dat);
