## Model: MSCAP

In [None]:
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from keras.models import Sequential
from tensorflow.python.keras.utils.vis_utils import plot_model
from keras.utils import np_utils,plot_model
from sklearn.model_selection import cross_val_score,train_test_split
from keras.layers import Dense, Dropout,Flatten,Conv1D,MaxPooling1D,AveragePooling1D
from keras.models import model_from_json
import matplotlib.pyplot as plt
from keras import backend as K
from keras.models import Model
from keras.layers import Input
from sklearn import preprocessing

### input

In [None]:
%%time
df_raw = pd.read_csv('../datasets/all_healthy_data.csv')

In [None]:
df_raw.shape

In [None]:
df_raw

In [None]:
#行列名保存下来
row_indices = df_raw['Unnamed: 0'].tolist()
column_names = df_raw.columns.values


In [None]:
#将第一列删除
del df_raw['Unnamed: 0']
#行名变为样本名
df_raw.index = row_indices
df_raw

In [None]:
# 提取特征列
X = df_raw.iloc[:, :-1].values
X = np.expand_dims(X.astype(float), axis=2)#增加一维轴
# 提取标签列
y = df_raw.iloc[:, -1].values
X.shape,y.shape

### Divide the dataset

In [None]:
# 划分训练集，测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Model

In [None]:
def Block(input):
    input_shape = Input(shape=(X_train.shape[1],1))
    
    #1st layer 如果卷积时，设置padding的属性值是 SAME ，则表示输出层尺寸 = 输入层尺寸
    layer_1 = Conv1D(1,1,padding = 'same',activation='relu')(input)
    dropout1 = Dropout(0.2)(layer_1)
    #2nd layer
    layer_2_1 = Conv1D(1, 1, padding='same', activation='relu')(input)
    layer_2_2 = Conv1D(1, 8, padding='same', activation='relu')(layer_2_1)
    dropout2 = Dropout(0.2)(layer_2_2)
    #3rd layer
    layer_3_1 = Conv1D(1, 1, padding='same', activation='relu')(input)
    layer_3_2 = Conv1D(1, 16, padding='same', activation='relu')(layer_3_1)
    layer_3_3 = Conv1D(1, 16, padding='same', activation='relu')(layer_3_2)
    dropout3 = Dropout(0.2)(layer_3_3)
    #4st layer
    layer_4_1 = Conv1D(1, 1, padding='same', activation='relu')(input)
    layer_4_2 = Conv1D(1, 32, padding='same', activation='relu')(layer_4_1)
    layer_4_3 = Conv1D(1, 32, padding='same', activation='relu')(layer_4_2)
    layer_4_4 = Conv1D(1, 32, padding='same', activation='relu')(layer_4_3)
    dropout4 = Dropout(0.2)(layer_4_4)

    output = keras.layers.concatenate([dropout1, dropout2, dropout3,dropout4], axis = 2)
    return output

In [None]:
#输入
input_shape = Input(shape=(X_train.shape[1],1))
#全连接层1
fc1 = Dense(512, activation='relu')(input_shape)
fc2 = Dense(25789, activation='linear')(fc1)
#调用block，构成MSCNN模块
#第一列
block1 = Block(fc2)
block2 = Block(fc2)
block3 = Block(fc2)
block4 = Block(fc2)
#第二列
block5 = Block(block2)
block6 = Block(block3)
block7 = Block(block4)
#第三列
block8 = Block(block6)
block9 = Block(block7)
#第四列
block10 = Block(block9)
#合并
MSCNN = keras.layers.concatenate([block1, block5, block8，block10, axis = 2)

flat_1 = Flatten()(MSCNN)
output1 = Dense(16, activation='relu')(flat_1)
dropout = Dropout(0.2)(output1)
output = Dense(1, activation='linear')(dropout)

model = Model([input_shape], output)
model.summary()
adam = keras.optimizers.adam_v2.Adam(learning_rate=0.0001)
model.compile(optimizer=adam, loss='mean_squared_error', metrics=['mae'])

### batch_size过大或者其他原因可能会导致占用内存过大，因此训练模型前可以提前释放一下服务器上

In [None]:
from keras.backend import set_session
from keras.backend import clear_session
from keras.backend import get_session
import tensorflow as tf
import gc
 
# Reset Keras Session
def reset_keras():
    sess = get_session()
    clear_session()
    sess.close()
    sess = get_session()
 
    try:
        del classifier # this is from global space - change this as you need
    except:
        pass
 
    print(gc.collect()) # if it does something you should see a number as output
 
    # use the same config as you used to create the session
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    set_session(tf.compat.v1.Session(config=config))

In [None]:
reset_keras()
# 训练模型
history = model.fit(X_train.astype(np.float32),Y_train.astype(np.float32), validation_data=(X_test.astype(np.float32), Y_test.astype(np.float32)),epochs=200, batch_size=128)

In [None]:
# 训练模型
history = model.fit(X_train.astype(np.float32),Y_train.astype(np.float32), validation_data=(X_test.astype(np.float32), Y_test.astype(np.float32)),epochs=200, batch_size=128)

### Loss函数

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('LOSS')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
import keras
from keras.models import load_model
# 保存训练好的model为hdf5文件
model.save('../model/save_model/model.h5')  
# 重新加载模型
# model = load_model('../model/save_model/model_selu.h5')

In [None]:
# 获取模型权重
weights = model.get_weights() 
# 给模型权重赋值，注意列表中的数组必须与 get_weights() 返回的权重具有相同的尺寸。
model.set_weights(weights)

In [None]:
import seaborn as sns
# train
predicted = model.predict(X_train)
y_pred = []
for i in predicted:
    y_pred.append(i[0])
y_pred = np.array(y_pred)
sns.scatterplot(Y_train, y_pred,color='black', s=3)
plt.legend(["Pred Age","y = x"])
plt.xlabel('True Age')
plt.ylabel('Predicted Age')
plt.show()


In [None]:
import seaborn as sns

# test
predicted = model.predict(X_test)
y_pred = []
for i in predicted:
    y_pred.append(i[0])
y_pred = np.array(y_pred)
sns.scatterplot(Y_test, y_pred,color='black', s=3)
plt.legend(["Pred Age","y = x"])
plt.xlabel('True Age')
plt.ylabel('Predicted Age')
plt.show()


In [None]:
#Metrics
from sklearn import metrics
from scipy.stats import pearsonr# R square way2
mad = round((np.median(np.abs(np.array(y_pred)-Y_test))),2)
mse = round((np.mean((np.array(y_pred)-Y_test) ** 2)),2)
rmse = round((np.sqrt(mse)),2)
pearsonR = round((pearsonr(Y_test, y_pred).statistic), 2)
pearsonR,mad,mse,rmse