In [1]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import make_scorer
from sklearn.model_selection import permutation_test_score
%matplotlib inline

In [2]:
def score_func(y, y_pred):
    score = roc_auc_score(y, y_pred, multi_class="ovo")
    return score

In [3]:
my_scores = make_scorer(score_func=score_func, greater_is_better=True, needs_proba=True, needs_threshold=False)

In [4]:
os.getcwd()

'/Users/zhengyuanrui/FastSame-Sequence/2_Analysis/2_Result'

In [5]:
os.chdir("../../1_Data/2_Postpro_data")

In [6]:
df = pd.read_csv("Exp2_RT.csv")

In [7]:
column = ['subj_idx', 'gender', 'year','education', 'condition', 'valence', 'matchness', 'rt']

In [8]:
df = df[column]

In [9]:
df["valence"] = df["valence"].map(dict(Neutral=0, Bad=1, Good=2))

In [10]:
df["condition"] = df["condition"].map(dict(simultaneous=0, word_first=1, image_first=2))

In [11]:
df["matchness"] = df["matchness"].map(dict(mismatch=0, match=1))

In [12]:
df_sim = df.query("condition == 1")

In [13]:
df_sim

Unnamed: 0,subj_idx,gender,year,education,condition,valence,matchness,rt
239,1,0,2000,5,1,0,0,761
240,1,0,2000,5,1,2,0,745
241,1,0,2000,5,1,1,1,639
242,1,0,2000,5,1,2,1,566
243,1,0,2000,5,1,2,1,510
...,...,...,...,...,...,...,...,...
12636,9,0,2003,4,1,0,1,527
12637,9,0,2003,4,1,2,1,692
12638,9,0,2003,4,1,1,1,571
12639,9,0,2003,4,1,1,0,903


In [14]:
df_img = df.query("condition == 2")

In [15]:
df_word = df.query("condition == 1")

In [16]:
sub_score = []
sub_pval = []

In [17]:
for sub, df_sub in df_img.groupby("subj_idx"):
    X = df_sub['rt'].values
    X_log = np.log(X)
    y = df_sub['valence'].values
    pipe_lr = make_pipeline(
    MinMaxScaler(), 
    LogisticRegression(solver='saga', multi_class="multinomial", max_iter=1000, random_state=123))
    cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=123)
    scores = cross_validate(pipe_lr, X_log.reshape(-1, 1), y, cv=cv, scoring=my_scores)
    sub_score.append(scores)
    _,_,pval = permutation_test_score(pipe_lr, X_log.reshape(-1, 1), y, cv=cv, n_permutations=1000, n_jobs=-1, random_state=123, verbose=1, scoring=my_scores, fit_params=None)
    sub_pval.append(pval)

    

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   32.4s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.7min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   31.6s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.8min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   33.3s
[Paral

In [18]:
mean_score = []

In [19]:
for test in sub_score:
    mean = np.mean(test['test_score'])
    mean_score.append(mean)

In [20]:
mean_score

[0.6098893229166665,
 0.5266921768707482,
 0.5723194444444445,
 0.5575680272108844,
 0.471030257936508,
 0.5532760416666666,
 0.5834691840277777,
 0.4742596371882088,
 0.5670846354166666,
 0.5266736111111111,
 0.6194205729166667,
 0.562296485260771,
 0.6256712689020382,
 0.4778152531229454,
 0.4777137188208617,
 0.4697032407407409,
 0.5783730158730159,
 0.5813652421652421,
 0.6039166666666667,
 0.5151832217261905]

In [21]:
p_correct = np.array(sub_pval)*20

In [22]:
sub_pval

[0.000999000999000999,
 0.2137862137862138,
 0.006993006993006993,
 0.03896103896103896,
 0.7782217782217782,
 0.060939060939060936,
 0.003996003996003996,
 0.7212787212787213,
 0.02097902097902098,
 0.20679320679320679,
 0.000999000999000999,
 0.028971028971028972,
 0.001998001998001998,
 0.7212787212787213,
 0.7222777222777222,
 0.7912087912087912,
 0.007992007992007992,
 0.004995004995004995,
 0.001998001998001998,
 0.32367632367632365]

In [23]:
np.sum(np.array(sub_pval)<(0.05/20))

4

In [24]:
score_id = []

In [25]:
for id in sub_score:
    score_id.append(id['test_score'])

In [26]:
pd.DataFrame(score_id).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.551432,0.522534,0.542907,0.530726,0.468056,0.482899,0.609722,0.425397,0.576215,0.577083,0.647786,0.579535,0.638462,0.361867,0.424830,0.468750,0.579138,0.525584,0.653401,0.354167
1,0.584635,0.558036,0.688294,0.523526,0.417212,0.509028,0.648698,0.540873,0.620747,0.616667,0.569661,0.496032,0.596581,0.458974,0.430159,0.433426,0.644161,0.581481,0.592857,0.509394
2,0.576172,0.513287,0.506101,0.616440,0.466865,0.589583,0.597222,0.480952,0.567231,0.604167,0.606771,0.553345,0.654241,0.515911,0.538889,0.530602,0.575397,0.534872,0.498073,0.548270
3,0.591797,0.502232,0.519593,0.530385,0.512103,0.562370,0.558333,0.437868,0.558420,0.520093,0.685547,0.557653,0.517094,0.461538,0.521429,0.458750,0.543707,0.638006,0.596655,0.567708
4,0.606120,0.550170,0.606845,0.594785,0.462897,0.586111,0.606380,0.449660,0.540191,0.547222,0.593099,0.566440,0.572650,0.405983,0.514286,0.554861,0.571088,0.587293,0.548469,0.589565
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.631510,0.566327,0.576042,0.646825,0.418056,0.610460,0.665972,0.470635,0.439497,0.516944,0.641927,0.557880,0.647272,0.488889,0.484127,0.462870,0.597902,0.566268,0.600680,0.552920
96,0.591797,0.511905,0.488046,0.581916,0.504018,0.558333,0.643750,0.425170,0.678385,0.469444,0.654948,0.642914,0.555556,0.417094,0.519048,0.504028,0.531746,0.726553,0.565306,0.497768
97,0.608073,0.427827,0.603125,0.479705,0.466171,0.589540,0.579861,0.390476,0.517622,0.473611,0.685547,0.602664,0.678764,0.445562,0.510091,0.486065,0.553968,0.574872,0.659354,0.535621
98,0.609375,0.572173,0.567907,0.609864,0.370883,0.588889,0.610417,0.498413,0.583594,0.441667,0.569661,0.561281,0.683892,0.501578,0.383333,0.453241,0.619444,0.637322,0.509410,0.573754
