In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("ISTM_all_data.csv")
df.head()

Unnamed: 0,category_of_pics,choose_sample_time,confidence,final_choice_x,final_choice_time,global_choose_sample,global_final_choice_time,global_time,global_time_onset_trial,majority_cat,...,total_trial_time,trial_no,trial_number,accuracy,prob_success,trajectory,subj_mean,subj_std,surprise_factor,numb_samples
0,lnc,1.77063,3.0,living,0.392477,14.431899,25.93448,222.592592,9.59995,living,...,16.333807,1,49.0,correct,0.6875,"[1,0,1]",0.834295,0.091179,3.0,3.0
1,lnc,1.365391,1.0,living,0.392477,18.071608,25.93448,120.365108,9.59995,living,...,16.333807,1,23.0,correct,0.6875,"[1,0,1]",0.834295,0.091179,3.0,3.0
2,lnc,0.919521,2.0,living,0.392477,21.263842,25.93448,368.881009,9.59995,living,...,16.333807,1,87.0,correct,0.6875,"[1,0,1]",0.834295,0.091179,3.0,3.0
3,ioc,1.053649,2.0,outdoor,0.359486,32.063873,42.309299,227.170421,27.962291,outdoor,...,14.346315,2,50.0,correct,0.6875,"[0,1,1]",0.834295,0.091179,3.0,3.0
4,ioc,0.588231,1.0,outdoor,0.359486,34.935504,42.309299,62.400627,27.962291,outdoor,...,14.346315,2,10.0,correct,0.6875,"[0,1,1]",0.834295,0.091179,3.0,3.0


## Preprocessing

In [3]:
df = df[["majority_cat", "final_choice_x", "pic_name", "prob_success", "surprise_factor", "reward_type", "old_new_judge"]]

In [4]:
df.head()

Unnamed: 0,majority_cat,final_choice_x,pic_name,prob_success,surprise_factor,reward_type,old_new_judge
0,living,living,living_81.jpg,0.6875,3.0,1,old
1,living,living,nonliving_572.jpg,0.6875,3.0,1,new
2,living,living,living_157.jpg,0.6875,3.0,1,old
3,outdoor,outdoor,indoor_192.jpg,0.6875,3.0,5,old
4,outdoor,outdoor,outdoor_78.jpg,0.6875,3.0,5,new


In [22]:
df.surprise_factor.unique()

array([ 3.,  2.,  1.])

In [5]:
df["success"] = df.apply(lambda row: row.majority_cat == row.final_choice_x, axis=1)

In [6]:
df["belong_to_major"] = df.apply(lambda row: row.majority_cat == row.pic_name.split("_")[0], axis=1)

In [7]:
df = df.drop(["majority_cat", "pic_name", "final_choice_x"], axis=1)

In [8]:
df.head()

Unnamed: 0,prob_success,surprise_factor,reward_type,old_new_judge,success,belong_to_major
0,0.6875,3.0,1,old,True,True
1,0.6875,3.0,1,new,True,False
2,0.6875,3.0,1,old,True,True
3,0.6875,3.0,5,old,True,False
4,0.6875,3.0,5,new,True,True


In [9]:
df.old_new_judge = df.old_new_judge.map({"old": 0, "new":1})
df.reward_type = df.reward_type.map({1:0, 5:1})
df = df.dropna()

## Prepare data

Usually, we split data 7:2:1 as train:development:test

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X = df.drop(["old_new_judge"], axis=1).as_matrix()
y = df["old_new_judge"].as_matrix()

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

## Logistic regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [14]:
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [15]:
model.coef_

array([[-0.17711688, -0.00818163, -0.02475383,  0.00632661,  0.03812808]])

### get development data

In [16]:
X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, test_size=0.333, shuffle=True)

In [17]:
model.score(X_dev, y_dev)

0.54939759036144575

In [18]:
from sklearn.neural_network import MLPClassifier

In [19]:
net = MLPClassifier(solver='adam', hidden_layer_sizes=(14, 10, 5), alpha=0.1, verbose=True,
                   max_iter=10000, tol=0.00000001)

In [20]:
net.fit(X_train, y_train)

Iteration 1, loss = 0.69608767
Iteration 2, loss = 0.69228981
Iteration 3, loss = 0.69123266
Iteration 4, loss = 0.69056103
Iteration 5, loss = 0.69008894
Iteration 6, loss = 0.68981826
Iteration 7, loss = 0.68961983
Iteration 8, loss = 0.68950158
Iteration 9, loss = 0.68933986
Iteration 10, loss = 0.68926015
Iteration 11, loss = 0.68913754
Iteration 12, loss = 0.68908019
Iteration 13, loss = 0.68899490
Iteration 14, loss = 0.68892630
Iteration 15, loss = 0.68888991
Iteration 16, loss = 0.68885697
Iteration 17, loss = 0.68880747
Iteration 18, loss = 0.68878008
Iteration 19, loss = 0.68872388
Iteration 20, loss = 0.68869478
Iteration 21, loss = 0.68870445
Iteration 22, loss = 0.68864938
Iteration 23, loss = 0.68863042
Iteration 24, loss = 0.68864427
Iteration 25, loss = 0.68863276
Iteration 26, loss = 0.68857712
Iteration 27, loss = 0.68856508
Iteration 28, loss = 0.68854994
Iteration 29, loss = 0.68855017
Iteration 30, loss = 0.68853385
Iteration 31, loss = 0.68853751
Iteration 32, los

MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(14, 10, 5), learning_rate='constant',
       learning_rate_init=0.001, max_iter=10000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=1e-08, validation_fraction=0.1,
       verbose=True, warm_start=False)

In [21]:
net.score(X_train, y_train)

0.54869945709423873