In [1]:
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, RidgeCV, Lasso
from sklearn.neural_network import MLPRegressor
from scipy.stats import pearsonr
from scipy import spatial
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [2]:
def pairwise_accuracy(actual, predicted):
    true = 0
    total = 0
    for i in range(0,len(actual)):
#         print(i)
        for j in range(i+1, len(actual)):
            total += 1

            s1 = actual[i]
            s2 = actual[j]
            b1 = predicted[i]
            b2 = predicted[j]

            result1 = spatial.distance.cosine(s1, b1)
            result2 = spatial.distance.cosine(s2, b2)
            result3 = spatial.distance.cosine(s1, b2)
            result4 = spatial.distance.cosine(s2, b1)

            if(result1 + result2 < result3 + result4):
                true += 1

    return(true/total)

In [3]:
def pearcorr(actual, predicted):
    corr = []
    r2 = []
    for i in range(0, len(actual)):
        corr.append(np.corrcoef(actual[i],predicted[i])[0][1])
        r2.append(np.corrcoef(actual[i],predicted[i])[0][1]**2)
    return np.mean(corr), np.mean(r2)

In [4]:
def mse_r2(actual, predicted):
    mae = []
    r2 = []
    for i in range(0, len(actual)):
        mae.append(mean_absolute_error(actual[i],predicted[i]))
        #r2.append(r2_score(predicted[i], actual[i]))
    return np.mean(mae)

In [5]:
kf = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

In [10]:
def train(vectors, voxels):
    
    dataset_X = np.array(voxels.copy())
    
    dataset_Y = np.array(vectors.copy())

#     accuracies = []
#     accuracies01 = []
    
    actual = []
    predicted = np.empty([0,10732])
    pairwise_2v2 = []
    final_corr = []
    mse = []
    r2 = []
    #rdm_feat = []

    cnt = 0
    indices = []
    for train_index, test_index in kf.split(dataset_X):

        X_train, X_test = dataset_X[train_index], dataset_X[test_index]
        y_train, y_test = dataset_Y[train_index], dataset_Y[test_index]
           
        model = Ridge(alpha=1.0)
        #model = MLPRegressor(learning_rate_init=0.1)
        #model =Lasso(alpha=0.1)
        model.fit(X_train,y_train)
        
        

        y_pred = model.predict(X_test)
        predicted = np.concatenate([predicted,y_pred],axis=0)
        #print(pairwise_2v2[cnt],final_corr[cnt],rdm_acc[cnt])
        cnt += 1
        indices.extend(test_index)
    return predicted, indices

In [6]:
data = np.load('./pieman_data_paraphrase.npy',allow_pickle=True)
data = data.item()

In [8]:
ROIS = ['eac_L', 'eac_R', 'aac_L', 'aac_R', 'pmc_L', 'pmc_R', 'tpoj_L', 'tpoj_R', 'LAN_L', 'LAN_R']

In [9]:
roi_voxels = np.empty([82,259,0])
for roi in ROIS:
    # data_voxels = loadmat('datafile.mat')
    data_voxels = np.load('../../afni-nosmooth/pieman_'+roi+'_avg.npy')
    data_voxels = data_voxels[:,data['non_empty_tr_indices'],:]
    print(data_voxels.shape)
    print(roi)
    print()
    roi_voxels = np.concatenate([roi_voxels,data_voxels],axis=2)
    print(roi_voxels.shape)
    
    #fn = './'+roi+'_srl_ridge.npy'
    #np.save(fn,output)

(82, 259, 808)
eac_L

(82, 259, 808)
(82, 259, 638)
eac_R

(82, 259, 1446)
(82, 259, 1420)
aac_L

(82, 259, 2866)
(82, 259, 1493)
aac_R

(82, 259, 4359)
(82, 259, 1198)
pmc_L

(82, 259, 5557)
(82, 259, 1204)
pmc_R

(82, 259, 6761)
(82, 259, 847)
tpoj_L

(82, 259, 7608)
(82, 259, 1188)
tpoj_R

(82, 259, 8796)
(82, 259, 1061)
LAN_L

(82, 259, 9857)
(82, 259, 875)
LAN_R

(82, 259, 10732)


In [11]:
for i in np.arange(82):
    roi_fmri = roi_voxels[i]
    #scaler = StandardScaler()
    #roi_fmri = scaler.fit_transform(roi_fmri)
    predicted,indices = train(roi_fmri,data['tr_features'])
    break
aa = predicted[indices]
print(aa.shape)
np.save('paraphrase_pred',aa)

(259, 10732)


In [9]:
methods = ['coreference', 'ner', 'nli', 'sa', 'shallosyntax', 'srl', 'paraphrase', 'summary', 'wsd', 'qa', 'el']
ROIS = ['eac_L', 'eac_R', 'aac_L', 'aac_R', 'pmc_L', 'pmc_R', 'tpoj_L', 'tpoj_R', 'LAN_L', 'LAN_R']