In [1]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import make_scorer
from sklearn.model_selection import permutation_test_score
%matplotlib inline

In [2]:
def score_func(y, y_pred):
    score = roc_auc_score(y, y_pred, multi_class="ovo")
    return score

In [3]:
my_scores = make_scorer(score_func=score_func, greater_is_better=True, needs_proba=True, needs_threshold=False)

In [4]:
os.getcwd()

'/Users/zhengyuanrui/FastSame-Sequence/2_Analysis/2_Result'

In [5]:
os.chdir("../../1_Data/2_Postpro_data")

In [6]:
df = pd.read_csv("Exp2_RT.csv")

In [7]:
column = ['subj_idx', 'gender', 'year','education', 'condition', 'valence', 'matchness', 'rt']

In [8]:
df = df[column]

In [9]:
df["valence"] = df["valence"].map(dict(Neutral=0, Bad=1, Good=2))

In [10]:
df["condition"] = df["condition"].map(dict(simultaneous=0, word_first=1, image_first=2))

In [11]:
df["matchness"] = df["matchness"].map(dict(mismatch=0, match=1))

In [12]:
df_sim = df.query("condition == 1")

In [13]:
df_sim

Unnamed: 0,subj_idx,gender,year,education,condition,valence,matchness,rt
239,1,0,2000,5,1,0,0,761
240,1,0,2000,5,1,2,0,745
241,1,0,2000,5,1,1,1,639
242,1,0,2000,5,1,2,1,566
243,1,0,2000,5,1,2,1,510
...,...,...,...,...,...,...,...,...
12636,9,0,2003,4,1,0,1,527
12637,9,0,2003,4,1,2,1,692
12638,9,0,2003,4,1,1,1,571
12639,9,0,2003,4,1,1,0,903


In [14]:
df_img = df.query("condition == 2")

In [15]:
df_word = df.query("condition == 1")

In [16]:
sub_score = []
sub_pval = []

In [17]:
for sub, df_sub in df_img.groupby("subj_idx"):
    X = df_sub['rt'].values
    X_log = np.log(X)
    y = df_sub['valence'].values
    pipe_lr = make_pipeline(
    MinMaxScaler(), 
    LogisticRegression(solver='saga', multi_class="multinomial", max_iter=1000, random_state=123))
    cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=123)
    scores = cross_validate(pipe_lr, X_log.reshape(-1, 1), y, cv=cv, scoring=my_scores)
    sub_score.append(scores)
    _,_,pval = permutation_test_score(pipe_lr, X_log.reshape(-1, 1), y, cv=cv, n_permutations=1000, n_jobs=-1, random_state=123, verbose=1, scoring=my_scores, fit_params=None)
    sub_pval.append(pval)

    

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   22.8s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   59.3s


KeyboardInterrupt: 

In [None]:
mean_score = []

In [None]:
for test in sub_score:
    mean = np.mean(test['test_score'])
    mean_score.append(mean)

In [None]:
mean_score

[0.6007812499999999,
 0.5231186224489796,
 0.5893253968253969,
 0.5806575963718822,
 0.45452876984126983,
 0.5652213541666666,
 0.6090017361111111,
 0.4885260770975056,
 0.5917361111111111,
 0.5278148148148148,
 0.6231119791666667,
 0.5295181405895691,
 0.6033793556870479,
 0.45072978303747535,
 0.5023922902494331,
 0.4800138888888889,
 0.5594784580498866,
 0.5533105413105412,
 0.5747392290249433,
 0.5089192708333334]

In [None]:
p_correct = np.array(sub_pval)*20

In [None]:
sub_pval

[0.001996007984031936,
 0.2694610778443114,
 0.001996007984031936,
 0.017964071856287425,
 0.8562874251497006,
 0.041916167664670656,
 0.001996007984031936,
 0.6307385229540918,
 0.003992015968063872,
 0.21756487025948104,
 0.001996007984031936,
 0.17365269461077845,
 0.013972055888223553,
 0.9181636726546906,
 0.5109780439121756,
 0.6626746506986028,
 0.06187624750499002,
 0.08582834331337326,
 0.027944111776447105,
 0.4151696606786427]

In [None]:
np.sum(np.array(sub_pval)<(0.05/20))

4

In [None]:
score_id = []

In [None]:
for id in sub_score:
    score_id.append(id['test_score'])

In [None]:
pd.DataFrame(score_id).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.551432,0.522534,0.542907,0.530726,0.468056,0.482899,0.609722,0.425397,0.576215,0.577083,0.647786,0.579535,0.638462,0.361867,0.42483,0.46875,0.579138,0.525584,0.653401,0.354167
1,0.584635,0.558036,0.688294,0.523526,0.417212,0.509028,0.648698,0.540873,0.620747,0.616667,0.569661,0.496032,0.596581,0.458974,0.430159,0.433426,0.644161,0.581481,0.592857,0.509394
2,0.576172,0.513287,0.506101,0.61644,0.466865,0.589583,0.597222,0.480952,0.567231,0.604167,0.606771,0.553345,0.654241,0.515911,0.538889,0.530602,0.575397,0.534872,0.498073,0.54827
3,0.591797,0.502232,0.519593,0.530385,0.512103,0.56237,0.558333,0.437868,0.55842,0.520093,0.685547,0.557653,0.517094,0.461538,0.521429,0.45875,0.543707,0.638006,0.596655,0.567708
4,0.60612,0.55017,0.606845,0.594785,0.462897,0.586111,0.60638,0.44966,0.540191,0.547222,0.593099,0.56644,0.57265,0.405983,0.514286,0.554861,0.571088,0.587293,0.548469,0.589565
5,0.565104,0.493729,0.658036,0.585884,0.408383,0.635417,0.6125,0.572902,0.58533,0.579861,0.620443,0.521939,0.655753,0.548192,0.591156,0.492546,0.480952,0.566211,0.619558,0.463542
6,0.603516,0.449405,0.578869,0.584184,0.445635,0.61059,0.646528,0.477438,0.663281,0.405278,0.70638,0.533787,0.587179,0.360552,0.453968,0.477546,0.510714,0.525698,0.53458,0.443452
7,0.658203,0.552083,0.639335,0.653968,0.399206,0.545573,0.601389,0.549206,0.618663,0.466667,0.609375,0.404082,0.604274,0.462853,0.577778,0.392778,0.629819,0.53886,0.620295,0.585193
8,0.652344,0.576956,0.550893,0.661905,0.514484,0.560503,0.640278,0.482596,0.561285,0.481944,0.544922,0.540476,0.593162,0.441683,0.438889,0.479861,0.540476,0.51453,0.613605,0.541109
9,0.61849,0.512755,0.602381,0.524773,0.450446,0.570139,0.568967,0.468367,0.625998,0.479167,0.647135,0.541893,0.614398,0.489744,0.53254,0.511019,0.519331,0.52057,0.469898,0.486793
