In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten,MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv1D
import tensorflow as tf
from tensorflow.keras import layers, models


In [None]:
# 载入数据
drugRDKitMatrix = pd.read_csv("R_09_drugRDKitMatrix.csv")
TargetCount = pd.read_csv("R_09_TargetCount.csv") 
durgTargetNum = pd.read_csv("R_09_durgTargetNum.csv")

In [None]:
def merged_target(first_target):
    durgTargetNum2 = durgTargetNum.copy()
    durgTargetNum2['targetNum'] = np.where(durgTargetNum2['targetNum'] == first_target, 100, 0)
    durgTargetNum2 = durgTargetNum2.drop_duplicates()
    merged_df = pd.merge(durgTargetNum2, drugRDKitMatrix, on='ids', how='inner')  # 这里使用inner连接，你可以根据需要选择连接方式
    merged_df = merged_df.dropna()
    merged_df = merged_df.drop(merged_df.columns[0],axis=1)
    return merged_df


def get_input_gradients(input_data):
    with tf.GradientTape() as tape:
        tape.watch(input_data)
        predictions = model(input_data)
    gradients = tape.gradient(predictions, input_data)
    return gradients

In [None]:
epochsNum=10
file = open('06_cnncla_RDKit.txt', 'a', encoding="utf-8")
item = '\t'.join(["ID","epochsNum","acc_","prec_", "reca_", "f1_"])
file.writelines(item + '\n')
file.close()

In [None]:
for target in TargetCount['Var1'][1:3]:
    print(target)
    data = merged_target(target)
    # 预处理
    X = data.drop('targetNum', axis=1).values.astype(np.float32).reshape(-1, 27018,1)
    y = data['targetNum'].values
    y = to_categorical(y)
    # 分割数据集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    # 构建CNN神经网络
    model = Sequential()
    model.add(Conv1D(32, kernel_size=2, activation='relu', input_shape=(27018, 1)))
    model.add(MaxPooling1D(pool_size =(20),padding ='same'))
    model.add(Flatten())
    model.add(Dense(y.shape[1], activation='softmax'))  # 输出层数量 = 类别数
    # 设置损失函数loss、优化器optimizer、准确性评价函数metrics
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    # 训练模型
    history = model.fit(X_train, y_train, validation_split=0.1, epochs=epochsNum)
    #保存模型
    modelName = '_'.join(["06_cnncla_RDKit",str(target),str(epochsNum), "model.h5"])
    weightName = '_'.join(["06_cnncla_RDKit",str(target),str(epochsNum), "weight.h5"])

    model.save(modelName)
    model.save_weights(weightName)

    # 测试集预测
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    # 混淆矩阵
    cm = confusion_matrix(y_true, y_pred_classes)
    # 模型评价
    acc_ = accuracy_score(y_true, y_pred_classes)
    prec_ = precision_score(y_true, y_pred_classes, average='weighted')
    reca_ = recall_score(y_true, y_pred_classes, average='weighted')
    f1_ = f1_score(y_true, y_pred_classes, average='weighted')
    print('acc_, prec_, reca_, f1_:', acc_, prec_, reca_, f1_)
    file = open('06_cnncla_RDKit.txt', 'a', encoding="utf-8")
    item = '\t'.join([str(target),str(epochsNum),str(acc_),str(prec_),str(reca_), str(f1_)])
    file.writelines(item + '\n')
    file.close()
    sample_index = 0  # 选择第一个样本作为示例
    x_sample = X_train[sample_index:sample_index+1]  # 选择一个样本并将其转换为二维数组，例如 (1, 27018, 1)
    # 将示例输入数据转换为 TensorFlow Tensor
    x_sample = tf.convert_to_tensor(x_sample, dtype=tf.float32)
    # 计算输入数据的梯度
    gradients = get_input_gradients(x_sample)
    # 将梯度转换为Pandas DataFrame
    gradient_df = pd.DataFrame({'Feature Index': range(len(gradients.numpy().flatten())),
                                'Gradient': gradients.numpy().flatten()})
    # 保存DataFrame到CSV文件
    output_file = '_'.join(["06_cnncla_RDKit",str(target),str(epochsNum), "gradients.csv"])
    gradient_df.to_csv(output_file, index=False)
    # 可以根据梯度的大小来衡量输入数据的重要程度
    # 例如，计算梯度的 L2 范数来表示重要程度
    gradients_norm = np.linalg.norm(gradients.numpy(), axis=1)  # 计算每个特征的梯度 L2 范数
    print(gradients_norm)


    gradients_norm_df = pd.DataFrame({'Sample Index': range(len(gradients_norm)), 'Gradient Norm': gradients_norm.flatten()})

    # 保存 DataFrame 到 CSV 文件
    output_file2 = '_'.join(["06_cnncla_RDKit",str(target),str(epochsNum), "gradients_norm_df.csv"])
    gradients_norm_df.to_csv(output_file2, index=False)

    
