In [8]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer
from scipy.stats import pearsonr
import random

In [9]:
# 获取特征和目标变量
def get_xy(data,ymark):

    X = data.iloc[:, data.columns.get_loc('A8m_L'):]

    y = data[ymark]
    return X, y

In [12]:
def linear_regression_cv(X, y, n_splits=10, n_runs=101):
    all_r_scores = []
    for _ in range(n_runs):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)
        r_scores = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            # 训练模型
            model = LinearRegression()
            model.fit(X_train, y_train)

            # 预测
            y_pred = model.predict(X_test)

            # 计算皮尔逊相关系数
            r, _ = pearsonr(y_test, y_pred)
            r_scores.append(r)

        all_r_scores.append(np.mean(r_scores))
    return all_r_scores
def permutation_test(X, y, res, n_permutations=1000):

    # 存储置换后的r值
    permutation_r_values = []

    for _ in range(n_permutations):
        # 随机打乱目标变量 y
        y_permuted = y.sample(frac=1).reset_index(drop=True)

        # 对置换数据进行一次交叉验证
        permuted_r = linear_regression_cv(X, y_permuted, n_runs=1)
        permutation_r_values.append(permuted_r)

    # 计算p值：原始r值大于等于置换r值的比例
    p_value = np.mean(np.array(permutation_r_values) >= res)

    return p_value
file_path = '/Volumes/QC/INT/INT_BN246_HC135BP_allMDD/Results/HAMD/MDD_IND_HAMD0w52w.csv'
data = pd.read_csv(file_path)
X, y = get_xy(data,'respond')

all_r_scores = linear_regression_cv(X, y)

res = np.median(all_r_scores)
print(res)
pvalue = permutation_test(X, y, res)
print('pvalue:', pvalue)

-0.029756927874254203
pvalue: 0.596


In [13]:
from sklearn.svm import LinearSVR

def SVR(X, y, n_splits=10, n_runs=101):
    all_r_scores = []
    for i in range(n_runs):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)
        r_scores = []
        k = 0
        for train_index, test_index in kf.split(X):
            k = k+1
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            # 训练模型
            model = LinearSVR()
            model.fit(X_train, y_train)

            # 预测
            y_pred = model.predict(X_test)

            data = np.column_stack((y_pred, y_test))
            weight = model.coef_

            # 保存预测结果和权重
            # np.savetxt('/Volumes/QCI/NormativeModel/Results/Result_GrayVol246_HBR_HCMDD_1129/StaResults/Longitudinal/Predict/SVR_Resp/''pre_'+str(i)+'_'+str(k)+'.csv', data, delimiter=',', header='y_pred,y_test', comments='')
            # np.savetxt('/Volumes/QCI/NormativeModel/Results/Result_GrayVol246_HBR_HCMDD_1129/StaResults/Longitudinal/Predict/SVR_Resp/''weight_'+str(i)+'_'+str(k)+'.csv', weight, delimiter=',', header='weight', comments='')

            # 计算皮尔逊相关系数
            r, _ = pearsonr(y_test, y_pred)
            r_scores.append(r)

        all_r_scores.append(np.mean(r_scores))
    return all_r_scores
def SVRpermutation_test(X, y, res, n_permutations=1000):

    # 存储置换后的r值
    permutation_r_values = []

    for _ in range(n_permutations):
        # 随机打乱目标变量 y
        y_permuted = y.sample(frac=1).reset_index(drop=True)

        # 对置换数据进行一次交叉验证
        permuted_r = SVR(X, y_permuted, n_runs=1)
        permutation_r_values.append(permuted_r)

    # 计算p值：原始r值大于等于置换r值的比例
    p_value = np.mean(np.array(permutation_r_values) >= res)

    return p_value

file_path = '/Volumes/QC/INT/INT_BN246_HC135BP_allMDD/Results/HAMD/MDD_IND_HAMD0w52w.csv'
data = pd.read_csv(file_path)
X, y = get_xy(data,'diff')

all_r_scores = SVR(X, y)
# np.savetxt('/Volumes/QCI/NormativeModel/Results/Result_GrayVol246_HBR_HCMDD_1129/StaResults/Longitudinal/Predict/SVR_Resp/rvalue.csv', all_r_scores, delimiter=',',  comments='')

res = np.median(all_r_scores)
index = all_r_scores.index(res)
print('index:',index)
print('r-value:',res)
pvalue = SVRpermutation_test(X, y, res)
print('permutation-pvalue:', pvalue)



index: 46
r-value: -0.01943020499950192




permutation-pvalue: 0.544


