# 加载数据集&预处理


In [29]:
from scipy.io import loadmat
import numpy as np

file_path = 'dataset/FTD_90_200_fMRI.mat'

labels=['FTD', 'NC']

label_mapping = {
    "FTD": 0,
    "NC": 1
}

# 使用loadmat函数读取.mat文件
data = loadmat(file_path)

## data格式
data={  
    'FTD':array(95, 90, 200),  
    'NC':array(86, 90, 200),  
}  
转换为  
data:(181,90,200)  
label:(181,)

尚未归一化  
后续：  
1.对于 同一样本的 同一脑区 的数据 进行归一化  
2.对于 不同样本间的 同一脑区的 同一时间步的数据 进行归一化  


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


ftd = data['FTD']
nc = data['NC']


all_labels = np.concatenate((np.zeros(ftd.shape[0]), np.ones(nc.shape[0])))

data_combined = np.vstack((ftd,nc))

data_combined.shape

## 归一化

### 归一化1  
对于 同一样本的 同一脑区 的数据 进行归一化  
data_combined[i]转置后 列是脑区序列随时间步变化的数据

In [None]:
data_norm1 = np.zeros(data_combined.shape)
print(data_norm1.shape)
print(data_norm1[0].shape)
for i in range(data_combined.shape[0]):
    data_norm1[i] = MinMaxScaler().fit_transform(data_combined[i].T).T

In [None]:
data_combined[0].T.shape   

### 归一化2  
对于 不同样本间的 同一脑区的 同一时间步的数据 进行归一化  

In [33]:
data_norm2 = np.zeros(data_combined.shape)
for region in range(data_combined.shape[1]):
    for time_step in range(data_combined.shape[2]):
        data_norm2[:, region, time_step] = MinMaxScaler().fit_transform(data_combined[:, region, time_step].reshape(-1, 1)).flatten()

In [None]:
data_combined[:, region, time_step].reshape(-1, 1).shape

### 归一化3  
所有样本 同一时间步的数据 进行归一化

In [35]:
data_norm3 = np.zeros(data_combined.shape)
data_norm3 = MinMaxScaler().fit_transform(data_combined.reshape(-1, data_combined.shape[-1])).reshape(data_combined.shape)

In [None]:
data_combined.reshape(-1, data_combined.shape[-1]).shape

### 归一化4  
每一个样本 不同脑区的同一时间步的数据 进行归一化

In [37]:
data_norm4 = np.zeros(data_combined.shape)
for sample in range(data_combined.shape[0]):
    for time_step in range(data_combined.shape[2]):
        data_norm4[sample, :, time_step] = MinMaxScaler().fit_transform(data_combined[sample, :, time_step].reshape(-1, 1)).flatten()

In [None]:
data_combined[0, :, 0].reshape(-1, 1).shape

## 可视化

### 散点图  
0号样本0号脑区随时间步变化的数据

In [None]:
d = data_combined[0][0]

fig, axs = plt.subplots(3, 2, figsize=(20, 10))

axs[0, 0].scatter(range(len(d)), d)
axs[0, 0].set_title('Original Data')

axs[0, 1].scatter(range(len(d)), data_norm1[0][0])
axs[0, 1].set_title('Normalized Data 1')

axs[1, 0].scatter(range(len(d)), data_norm2[0][0])
axs[1, 0].set_title('Normalized Data 2')

axs[1, 1].scatter(range(len(d)), data_norm3[0][0])
axs[1, 1].set_title('Normalized Data 3')

axs[2, 0].scatter(range(len(d)), data_norm4[0][0])
axs[2, 0].set_title('Normalized Data 4')

plt.tight_layout()
plt.show()


In [None]:
"""import matplotlib.pyplot as plt
import random
import numpy as np

num_samples = data_combined.shape[0]  

random_sample_indices = list(range(num_samples))
random.shuffle(random_sample_indices)


for sample_index in random_sample_indices[:10]:
    l = all_labels[sample_index]
    sample = data_combined[sample_index][0]
    sample = sample.reshape(1,200)
    
    plt.figure(figsize=(80, 20))
    plt.imshow(sample, cmap='gray')
    plt.title(f"Sample {sample_index} Gray-Scale Image")
    plt.xlabel("Width (200)")
    plt.ylabel("Height (90)")
    print(l)
    
    plt.show()"""


## 转置

In [41]:
use_data = data_norm4

data_T = np.zeros((use_data.shape[0], use_data.shape[2], use_data.shape[1]))
for i in range(use_data.shape[0]):
    data_T[i] = use_data[i].T

## Autoencoder  
减少脑区数量或时间步

In [None]:
"""

from tensorflow.keras.layers import Input,GRU
from tensorflow.keras.models import Model
import numpy as np
import tensorflow as tf

tf.random.set_seed(0)

use_data = data_T

# 定义Autoencoder模型
input_shape = (use_data.shape[1], use_data.shape[2])
input_layer = Input(shape=input_shape)
encoded = GRU(64, activation='relu', return_sequences=True)(input_layer)
encoded = GRU(45, activation='relu', return_sequences=True)(encoded)

decoded = GRU(45, activation='relu', return_sequences=True)(encoded)
decoded = GRU(64, activation='relu', return_sequences=True)(decoded)
decoded = GRU(use_data.shape[2], activation='sigmoid', return_sequences=True)(decoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

# 训练Autoencoder模型
autoencoder.fit(use_data, use_data, epochs=50, batch_size=32, shuffle=True)

# 生成新的数据
encoder = Model(input_layer, encoded)
data_reduce = encoder.predict(use_data).reshape((181,45,200))
print(data_reduce.shape)

"""

## 分为训练和验证  
在下面选择归一化方式

In [43]:
from sklearn.model_selection import train_test_split

Use_data = data_T

X_train, X_val, y_train, y_val = train_test_split(Use_data, all_labels, test_size=0.2, random_state=0)

# 模型

## RNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, GRU, LSTM, Dense, Reshape, Bidirectional, Dropout, Activation
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
import tensorflow as tf

tf.random.set_seed(0)


model = Sequential()
model.add(Reshape((X_train.shape[1], X_train.shape[2]), input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model.add(GRU(64, return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(GRU(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(256)))
model.add(Dropout(0.7))
model.add(Dense(2, activation='softmax'))

# 编译模型
optimizer = Adam(learning_rate=0.000001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [45]:
# 学习率调度器
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0000001)

In [None]:
from tensorflow.keras.utils import to_categorical


y_train_one_hot = to_categorical(y_train, num_classes=2)
y_val_one_hot = to_categorical(y_val, num_classes=2)

history = model.fit(
    X_train, y_train_one_hot, 
    validation_data=(X_val, y_val_one_hot), 
    epochs=50,
    batch_size=64,
    callbacks=[reduce_lr] 
)

In [None]:
# 绘制训练和验证的损失和准确率曲线
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

## 