In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
import openpyxl
import numpy as np
import pandas as pd
import tensorflow as tf
import scipy.stats as stats
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from datetime import datetime

In [2]:
# 将各类变量放在一个位置集中定义，十分有利于机器学习等变量较多的代码
MyModelPath="./model_save/" # 确定每一次训练所得模型保存的位置
MyDataPath="./data/image data/mdALFF matrix excel/mdALFF.csv" # 确定输入数据的位置
MyResultSavePath=f"./model_save/{datetime.now().strftime('%D %H:%M:%S')} - Result.xlsx" # 确定模型精度结果（RMSE等）与模型参数保存的位置
TestSize=0.2 # 确定数据中测试集所占比例
RandomSeed=np.random.randint(low=24,high=25) # 确定划分训练集与测试集的随机数种子
OptMethod='Adam' # 确定模型所用的优化方法
LearningRate=0.001 # 确定学习率
DecayStep=200 # 确定学习率下降的步数
DecayRate=0.96 # 确定学习率下降比率
HiddenLayer=[50,100] # 确定隐藏层数量与每一层对应的神经元数量
ActFun='tf.nn.relu' # 确定激活函数
Dropout=0.5 # 确定Dropout的值
LossReduction='tf.compat.v1.ReductionV2.SUM_OVER_BATCH_SIZE' # 指定每个批次训练误差的减小方法
BatchNorm='False' # 确定是否使用Batch Normalizing
TrainBatchSize=20 # 确定训练数据一个Batch的大小
TrainStep=3000 # 确定训练数据的Step数量
EvalBatchSize=1 # 确定验证数据一个Batch的大小
PredictBatchSize=1 # 确定预测数据（即测试集）一个Batch的大小

In [3]:
# LoadData函数，加载全部数据
def LoadData(DataPath):
    MyData=pd.read_csv(DataPath) # 加载DataPath路径所指定的数据，names中的内容为各列的名称

    MyData = MyData[MyData.columns[1::]]
    return MyData

# 初始数据处理
AllX=LoadData(MyDataPath) # 调用LoadData函数，获取数据
clinical_data = pd.read_csv('./data/clinical data/clinical_data.csv')[['subj', 'AD8主觀認知障礙', 'MOCA客觀認知測驗分數', 'HADS_A焦慮程度', 'HADS_D憂鬱程度']]
patient_value_index = clinical_data['MOCA客觀認知測驗分數']



# 划分数据训练集与测试集
TrainX,TestX,TrainY,TestY=train_test_split(AllX,
                                           patient_value_index,
                                           test_size=TestSize, # 指定数据中测试集所占比例
                                           random_state=RandomSeed # 指定划分训练集与测试集的随机数种子
                                           )

In [4]:
FeatureColumn=[] # 定义一个新的“Feature columns”对象
for key in AllX.keys():
    FeatureColumn.append(tf.feature_column.numeric_column(key=key)) # 将全部因变量数据（需要均为连续变量）导入

In [5]:
# Optimizer=OptMethod # 优化方法选用OptMethod所指定的方法
Optimizer=lambda:tf.keras.optimizers.Adam(
    learning_rate=tf.compat.v1.train.exponential_decay(learning_rate=LearningRate, # 初始学习率
                                                       global_step=tf.compat.v1.train.get_global_step(),
                                                       # 全局步数，用以计算已经衰减后的学习率
                                                       # get_global_step()函数自动获取当前的已经执行的步数
                                                       decay_steps=DecayStep, # 学习率下降完成的指定步数
                                                       decay_rate=DecayRate # 衰减率
                                                       ) # 选用基于学习率指数下降的Adam方法，此举有助于降低过拟合风险
                                                         # 这一函数返回每次对应的学习率
    )

In [6]:
from tensorflow.python.profiler import trace
# 基于DNNRegressor构建深度学习模型
DNNModel=tf.estimator.DNNRegressor(feature_columns=FeatureColumn, # 指定模型所用的“Feature columns”对象
                                   hidden_units=HiddenLayer, # 指定隐藏层数量与每一层对应的神经元数量
                                   optimizer=Optimizer, # 指定模型所用的优化方法                                  
                                   activation_fn=eval(ActFun), # 指定激活函数
                                   dropout=Dropout, # 指定Dropout的值
                                   label_dimension=1, # 输出数据的维度，即因变量的个数
                                   model_dir=MyModelPath, # 指定每一次训练所得模型保存的位置
                                   # loss_reduction=eval(LossReduction), # 指定每个批次训练误差的减小方法
                                   batch_norm=eval(BatchNorm) # 指定是否使用Batch Normalizing
                                   )

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model_save/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [7]:
# InputFun函数，训练数据与验证数据所用的Input函数
def InputFun(Features,Labels,Training,BatchSize):
    Datasets=tf.data.Dataset.from_tensor_slices((dict(Features),Labels)) # 对数据加以加载
    if Training:
        Datasets=Datasets.shuffle(1000).repeat() # 对于训练数据，需要打乱（shuffle）、重复（repeat）
    return Datasets.batch(BatchSize) # 将经过上述处理后的数据以每次BatchSize个输出

# 基于训练数据训练模型
DNNModel.train(input_fn=lambda:InputFun(TrainX,
                                        TrainY,
                                        True,
                                        TrainBatchSize
                                        ), # 调用InputFun函数；InputFun函数返回“tf.data.Dataset”对象，这个对象才可以被
                                           # train函数识别并带入模型；由于InputFun函数每次返回BatchSize大小的数据个数，
                                           # 因此需要多次执行，前面需要加lambda
               steps=TrainStep # 指定模型训练的步数
               )

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into ./model_save/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 674.3258, step = 0
INFO:tensorflow:global_step/sec: 63.3819
INFO:tensorflow:loss = 71.856544, step = 100 (1.579 sec)
INFO:tensorflow:global_step/sec: 94.7996
INFO:tensorflow:loss = 66.29317, step = 200 (1.055 sec)
INFO:tensorflow:global_step/sec: 92.7462
INFO:tensorflow:loss = 43.441273, step = 300 (1.079 sec)
INFO:tensorflow:global_step/sec: 88.2287
INFO:tensorflow:

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressorV2 at 0x7f41bbdab750>

In [8]:
# InputFunPredict函数，测试数据所用的Input函数
def InputFunPredict(Features,BatchSize):
    return tf.data.Dataset.from_tensor_slices(dict(Features)).batch(BatchSize) # 对数据加以加载,以每次BatchSize个输出    

# 验证模型并保存验证结果
EvalResult=DNNModel.evaluate(input_fn=lambda:InputFun(TestX,
                                                      TestY,
                                                      False,
                                                      EvalBatchSize
                                                      )
                             )
# 打印验证结果
print('ev:{}'.format(EvalResult))

# 基于测试数据测试模型精度结果
PredictValues=DNNModel.predict(input_fn=lambda:InputFunPredict(TestX,
                                                               PredictBatchSize
                                                               )
                               )

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2022-06-28T11:46:27
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_save/model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.25740s
INFO:tensorflow:Finished evaluation at 2022-06-28-11:46:28
INFO:tensorflow:Saving dict for global step 3000: average_loss = 16.93486, global_step = 3000, label/mean = 24.235294, loss = 16.93486, prediction/mean = 24.88824
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 3000: ./model_save/model.ckpt-3000
ev:{'average_loss': 16.93486, 'label/mean': 24.235294, 'loss': 16.93486, 'prediction/mean': 24.88824, 'global_step': 3000}


In [9]:
# AccuracyVerification函数，进行精度验证指标的计算与绘图
def AccuracyVerification(PredictLabels,TestLabels):
    value=0
    PredictValuesList=[]
    for k in PredictLabels:
        value=k.get('predictions')[0]
        print(value)
        PredictValuesList.append(value)
    TestLabels=TestLabels.values.tolist()
    print(TestLabels, PredictValuesList)
    TestYList=sum(TestLabels,[])
    # 以上为获取测试数据的因变量与模型预测所得的因变量
    Pearsonr=stats.pearsonr(TestYList,PredictValuesList) # 计算皮尔逊相关系数
    R2=metrics.r2_score(TestYList,PredictValuesList) # 计算R方
    RMSE=metrics.mean_squared_error(TestYList,PredictValuesList)**0.5 # 计算RMSE
    plt.cla()
    plt.plot(TestYList,PredictValuesList,'r*')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    # 以上为绘制拟合图像
    print('Pearson correlation coefficient is {0}, and RMSE is {1}.'.format(Pearsonr[0],RMSE))
    return (Pearsonr[0],R2,RMSE,PredictValuesList)

# 调用AccuracyVerification函数，进行精度验证指标的计算与绘图
AccuracyResult=AccuracyVerification(PredictValues,TestY)
PearsonR,R2,RMSE,PredictY=AccuracyResult[0],AccuracyResult[1],AccuracyResult[2],AccuracyResult[3]


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_save/model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
24.447832
25.685457
23.968506
24.93865
24.560299
25.95132
27.829668
21.868242
24.69504
24.807089
22.39534
25.75075
26.379555
26.218996
25.424747
23.91822
24.260405
[30, 26, 18, 25, 21, 19, 24, 25, 21, 21, 17, 26, 29, 30, 30, 29, 21] [24.447832, 25.685457, 23.968506, 24.93865, 24.560299, 25.95132, 27.829668, 21.868242, 24.69504, 24.807089, 22.39534, 25.75075, 26.379555, 26.218996, 25.424747, 23.91822, 24.260405]


TypeError: can only concatenate list (not "int") to list