# 加载数据集


In [44]:
from scipy.io import loadmat
import numpy as np

file_path = 'dataset/ADNI.mat'

labels=['AD', 'MCI', 'MCIn', 'MCIp', 'NC']

label_mapping = {
    "AD": 0,
    "MCI": 1,
    "MCIn": 2,
    "MCIp": 3,
    "NC": 4
}

# 使用loadmat函数读取.mat文件
data = loadmat(file_path)

# data格式
data={  
    'AD':array(51, 186),  
    'MCI':array(99, 186),  
    'MCIn':array(56, 186),  
    'MCIp':array(43, 186),  
    'NC':array(52, 186)  
}

In [45]:
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


ad = data['AD']
mci = data['MCI']
mcin = data['MCIn']
mcip = data['MCIp']
nc = data['NC']

all_labels = np.concatenate((np.zeros(ad.shape[0]), np.ones(mci.shape[0]), np.full(mcin.shape[0], 2), np.full(mcip.shape[0], 3), np.full(nc.shape[0], 4)))

data_combined = np.vstack((ad, mci, mcin, mcip, nc))
data_combined = MinMaxScaler().fit_transform(data_combined)
df = pd.DataFrame(data_combined, columns=[i for i in range(data_combined.shape[1])])

# autoencoder

In [None]:
data_array = np.array(data_combined)

from keras.layers import Input, Dense, Conv1D, MaxPooling1D, UpSampling1D
from keras.models import Model
from keras.optimizers import Adam


data_array = np.concatenate((data_array,all_labels.reshape(-1, 1)),axis=1)


class_num = data_array.shape[1]
class_num_reduce = int(class_num / 2)
# class_num_reduce= 5

# 定义自编码器结构
input_layer = Input(shape=(class_num,))
encoded = Dense(class_num_reduce*4, activation='relu')(input_layer)  # 编码层
encoded = Dense(class_num_reduce*2, activation='relu')(encoded)  # 编码层
encoded = Dense(class_num_reduce, activation='relu')(encoded)  # 编码层

decoded = Dense(class_num_reduce*2, activation='relu')(encoded)  # 解码层
decoded = Dense(class_num_reduce*4, activation='relu')(decoded)  # 解码层
decoded = Dense(class_num, activation='relu')(decoded)  # 解码层

"""
input_layer = Input(shape=(class_num,1))
encoded = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(input_layer)  # 编码层
encoded = MaxPooling1D(pool_size=2, padding='same')(encoded)  # 编码层
encoded = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(encoded)  # 编码层
encoded = MaxPooling1D(pool_size=2, padding='same')(encoded)  # 编码层
encoded = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(encoded)  # 编码层
encoded = MaxPooling1D(pool_size=2, padding='same')(encoded)  # 编码层

# encoded = Conv1D(filters=1, kernel_size=3, activation='relu', padding='same')(encoded)  # 编码层

decoded = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(encoded)  # 解码层
decoded = UpSampling1D(size=2)(decoded)  # 解码层
decoded = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(decoded)  # 解码层
decoded = UpSampling1D(size=2)(decoded)  # 解码层
decoded = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(decoded)  # 解码层
decoded = UpSampling1D(size=2)(decoded)  # 解码层
decoded = Conv1D(filters=1, kernel_size=3, activation='relu', padding='same')(decoded)  # 解码层
"""


# 创建自编码器模型
autoencoder = Model(input_layer, decoded)

# 编译模型
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
autoencoder.summary()
# 训练模型，只传递特征数据
autoencoder.fit(data_array, data_array, epochs=200, batch_size=256, shuffle=True)


# 使用编码器进行降维
encoder = Model(input_layer, encoded)
data_reduce = encoder.predict(data_array)  # 降维后的数据

# 将降维后的数据与标签一起使用
data_reduce = np.column_stack((data_reduce, all_labels))
print("合并后的数据形状:", data_reduce.shape)

# 分为训练和验证

In [47]:
from sklearn.model_selection import train_test_split
# 这里使用Autoencoder降维后的数据集进行训练和验证
X_train, X_val, y_train, y_val = train_test_split(data_reduce[:,:-1], data_reduce[:,-1], test_size=0.2, random_state=42)

# 这里使用原数据集进行训练和验证,注意注释掉下面的PCA
# X_train, X_val, y_train, y_val = train_test_split(data_combined, all_labels, test_size=0.2, random_state=42)
# X_train, X_val, y_train, y_val = train_test_split(data1, all_labels, test_size=0.2, random_state=42)

# PCA

In [None]:
"""from sklearn.decomposition import PCA

pca = PCA(n_components=37)

X_train = pca.fit_transform(X_train)
X_val = pca.transform(X_val)"""

# SVM

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

model = SVC(kernel='linear')

# 训练模型
model.fit(X_train, y_train)

# 预测
y_pred = model.predict(X_val)

# 计算准确率
accuracy = accuracy_score(y_val, y_pred)
print(f'valid accuracy: {accuracy}')

accuracy = accuracy_score(model.predict(X_train), y_train)
print(f'train accuracy: {accuracy}')

# 卷积

In [50]:
import numpy as np

X_train1 = np.expand_dims(X_train, axis=-1)
X_val1 = np.expand_dims(X_val, axis=-1)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
# 定义模型
# 定义输入层
inputs = layers.Input(shape=(X_train1.shape[1], 1))

# 添加一维卷积层
x = layers.Conv1D(filters=64, kernel_size=3)(inputs)
x = layers.ReLU()(x)
x = layers.MaxPooling1D(pool_size=2)(x)
x = layers.Dropout(0.5)(x)

# 展平层
x = layers.Flatten()(x)

# 全连接层
x = layers.Dense(32, activation='relu')(x)
outputs_forword = layers.Dense(16, activation='relu')(x)
outputs = layers.Dense(5, activation='relu')(outputs_forword)

# 创建模型
model = models.Model(inputs=inputs, outputs=outputs)

# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
history=model.fit(X_train1, y_train, epochs=50, batch_size=256, validation_data=(X_val1, y_val))
predictions = model.predict(X_val1)

In [None]:
# 绘制训练和验证的损失和准确率曲线
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# 分类器换成svm

In [None]:
model_to_svm = Model(inputs=inputs, outputs=outputs_forword)
# 用model_to_svm处理数据集
X_train_svm = model_to_svm.predict(X_train1)
X_val_svm = model_to_svm.predict(X_val1)
# SVM
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm = SVC(kernel='linear')

# 训练模型
svm.fit(X_train_svm, y_train)

# 预测
y_pred = svm.predict(X_val_svm)
y_pred_train = svm.predict(X_train_svm)

# 计算准确率
accuracy = accuracy_score(y_val, y_pred)
print(f'valid accuracy: {accuracy}')
accuracy = accuracy_score(y_train, y_pred_train)
print(f'train accuracy: {accuracy}')
