In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten,MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv1D
import tensorflow as tf
from tensorflow.keras import layers, models


In [None]:
# 载入数据
file_path = 'R_09_targetCNN_RDKit_singletargettest.csv'  # 修改为您的文件路径
data = pd.read_csv(file_path)

In [None]:
# 预处理
X = data.drop('targetNum', axis=1).values.astype(np.float32).reshape(-1, 27018,1)
y = data['targetNum'].values
y = to_categorical(y)

In [None]:
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# 构建CNN神经网络
model = Sequential()
model.add(Conv1D(32, kernel_size=2, activation='relu', input_shape=(27018, 1)))
model.add(MaxPooling1D(pool_size =(20),padding ='same'))
model.add(Flatten())
model.add(Dense(y.shape[1], activation='softmax'))  # 输出层数量 = 类别数

In [None]:
# # 构建CNN神经网络
# model = Sequential()
# model.add(Conv1D(5, kernel_size=2, activation='relu', input_shape=(27018, 1)))
# # Dropout
# model.add(Dropout(0.2))
# # 添加另一个卷积层 padding ='valid'表示输出尺寸可以采用任何形式
# model.add(Conv1D(5, kernel_size=2,activation ='relu',padding ='valid'))
# # 添加一个最大池化层
# model.add(MaxPooling1D(pool_size =(2)))
# # 展平
# model.add(Flatten())
# # Dense层 隐藏单元数为521
# model.add(Dense(512, activation='relu'))
# # Dropout
# model.add(Dropout(0.3))
# #output 
# model.add(Dense(y.shape[1], activation='softmax'))  # 输出层数量 = 类别数


In [None]:
# # 构建CNN神经网络
# model = Sequential()
# model.add(Conv1D(5, kernel_size=2, activation='relu', input_shape=(27018, 1)))
# model.add(Flatten())
# model.add(Dense(y.shape[1], activation='softmax'))  # 输出层数量 = 类别数

In [None]:
# 设置损失函数loss、优化器optimizer、准确性评价函数metrics
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# 训练模型
history = model.fit(X_train, y_train, validation_split=0.1, epochs=5)


In [None]:
#保存模型

model.save_weights('06_cnncla_RDKit_weight3.h5')
model.save('06_cnncla_RDKit_model3.h5')

In [None]:
# loss可视化
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# 测试集预测
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

In [None]:
# 混淆矩阵
cm = confusion_matrix(y_true, y_pred_classes)


In [None]:
# 模型评价
acc_ = accuracy_score(y_true, y_pred_classes)
prec_ = precision_score(y_true, y_pred_classes, average='weighted')
reca_ = recall_score(y_true, y_pred_classes, average='weighted')
f1_ = f1_score(y_true, y_pred_classes, average='weighted')
print('acc_, prec_, reca_, f1_:', acc_, prec_, reca_, f1_)

In [None]:
# 绘制评价指标条形图
metrics = [acc_, prec_, reca_, f1_]
plt.figure()
labels = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
plt.bar(labels, metrics)
plt.title('Classification Metrics')
plt.savefig('Metrics.png')
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix Heatmap')
plt.show()

In [None]:

# 获取模型的权重
weights = model.get_weights()

# 保存权重到文件
np.savez("model_weights.npz", weights)

In [None]:
# 加载保存的权重
loaded_weights = np.load("model_weights.npz", allow_pickle=True)

# 设置模型的权重
model.set_weights(loaded_weights['arr_0'])


In [None]:
for i, layer_weights in enumerate(weights):
    print("Layer", i+1, "weights:")
    print(layer_weights)

In [None]:
# 假设模型已经训练好并保存在model变量中

# 获取第一层的权重
first_layer_weights = model.layers[0].get_weights()[0]
print(first_layer_weights)


# 将权重转换为二维数组
num_rows, num_cols, _ = first_layer_weights.shape
flattened_weights = first_layer_weights.reshape((num_rows * num_cols, -1))

# 保存为CSV文件
np.savetxt("first_layer_weights.csv", flattened_weights, delimiter=",")

In [None]:
# 提取模型的第一个卷积层
conv_layer = model.layers[0]
# 获取模型的梯度函数
grad_model = tf.keras.models.Model([model.inputs], [conv_layer.output, model.output])

# 计算梯度
with tf.GradientTape() as tape:
    conv_output, predictions = grad_model(X_train)
    loss = tf.reduce_mean(predictions)

grads = tape.gradient(loss, conv_output)

# 求梯度权重
pooled_grads = tf.reduce_mean(grads, axis=(0, 1))