In [1]:
from scipy.io import loadmat
import numpy as np

file_path = 'dataset/ADNI.mat'

labels=['AD', 'MCI', 'MCIn', 'MCIp', 'NC']

label_mapping = {
    "AD": 0,
    "MCI": 1,
    "MCIn": 2,
    "MCIp": 3,
    "NC": 4
}


# 使用loadmat函数读取.mat文件
data = loadmat(file_path)

# label (样本，维度)
AD (51, 186)
MCI (99, 186)
MCIn (56, 186)
MCIp (43, 186)
NC (52, 186)


# 将186维度的向量 添加10个维度（补均值） reshape为14*14的矩阵

In [2]:
def init_data(data, labels):
    data_res = {}

    for label in labels:
        data_res[label] = []
        data_ls = data[label]
        
        for data_l in data_ls:  # 填充10个0, 转换为14*14的矩阵
            mean=sum(data_l)/data_l.size
            data_l = np.append(data_l, np.array([mean]*10))
            data_l = data_l.reshape((14, 14))
            data_res[label].append(data_l)
        
        data_res[label] = np.array(data_res[label])

    return data_res

data=init_data(data,labels)


# 将数据集变形

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader

# 合并数据和标签
all_data = []
all_labels = []

for label, array in data.items():
    num_samples = array.shape[0]
    all_data.append(array)
    all_labels.extend([label_mapping[label]] * num_samples)

# 转换为 numpy 数组
all_data = np.concatenate(all_data, axis=0)
all_labels = np.array(all_labels)


class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data.astype(np.float32)
        self.labels = labels.astype(np.int64)
        self.transform = transform

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            sample = self.transform(sample)

        return torch.tensor(sample, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

    def __len__(self):
        return len(self.data)



# 加载数据集到dataloader

In [None]:
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Normalize
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# 将数据分为训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(all_data, all_labels, test_size=0.2, random_state=42, stratify=all_labels)


# 创建训练集和验证集的 Dataset 实例
train_dataset = CustomDataset(X_train, y_train,transform)
val_dataset = CustomDataset(X_val, y_val,transform)

# 使用 DataLoader 加载训练集和验证集
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

dataloaders = {
    'train':train_loader,
    'valid':val_loader
}
for data_batch, label_batch in train_loader:
    print(f"Data batch shape: {data_batch.shape}, Label batch: {label_batch}")
    break  # 仅打印第一个 batch

# 定义模型

In [5]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3,1,1)  # 输入通道1，输出通道16

        self.conv2 = nn.Conv2d(16, 16, 3,2,1)  # 输入通道1，输出通道16
        
        self.conv3 = nn.Conv2d(16, 32, 3,1,1) # 输入通道16，输出通道32
        self.fc1 = nn.Linear(32 * 3 * 3, 120)       # 全连接层
        self.fc2 = nn.Linear(120, 84)                # 全连接层
        self.fc3 = nn.Linear(84, 5)                 # 输出层，10个分类

        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)  # 平均池化
        
        self.act = nn.Sigmoid()

    def forward(self, x):
        x = self.act(self.conv1(x))  

        x = self.act(self.conv2(x))  

        x = self.pool(self.act(self.conv3(x)))
        x = x.view(-1, 32 * 3 * 3)               
        x = self.act(self.fc1(x))              
        x = self.act(self.fc2(x))           
        x = self.fc3(x)            
        return x


model=Net()


# 训练

In [None]:
import time
import copy
import torch
import os
import torch.optim as optim

num_epochs=1000

device = 'cuda' if torch.cuda.is_available() else 'cpu'

optimizer = optim.Adam(model.parameters(), lr= 0.0001)

criterion = nn.CrossEntropyLoss()

scheduler=None

filename='checkpoint.pth'

since = time.time()  # 记录开始时间
best_acc = 0

model.to(device)

# 记录数据
val_acc_history = []
train_acc_history = []
train_losses = []
valid_losses = []

best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    if epoch%100==0:
        # 打印当前epoch和分隔符
        print('-' * 10)
        print(f'Epoch {epoch + 1}/{num_epochs}')


    # 训练和验证
    for phase in ['train', 'valid']:
        if phase == 'train':
            model.train()  # 训练
        else:
            model.eval()  # 验证

        running_loss = 0.0
        running_corrects = 0

        # 遍历数据
        for images, labels in dataloaders[phase]:
            images = images.to(device)
            labels = labels.to(device)

            # 清零
            optimizer.zero_grad()
            # 只有训练的时候计算和更新梯度
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(images)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                if phase == 'train':  # 训练时更新权重
                    loss.backward()
                    optimizer.step()
            # 计算损失
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

        time_elapsed = time.time() - since
        if epoch%100==0:
            print(f'Time elapsed {int(time_elapsed // 60)}m {int(time_elapsed % 60)}s')
            print(f'{phase} Loss:{round(epoch_loss, 4)} Acc:{round(epoch_acc.item(), 4)}')

        # 每个epoch看现在是不是比以前的模型更好，如果是则保存下来
        if phase == 'valid' and epoch_acc > best_acc:  # 以验证集的准确率为指标，越高越好
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            state = {
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }
            torch.save(state, filename)

        # 记录数据，后续用于绘图
        if phase == 'valid':
            val_acc_history.append(epoch_acc.cpu().numpy())
            valid_losses.append(epoch_loss)
            if scheduler:
                scheduler.step(epoch_loss)

        if phase == 'train':
            train_acc_history.append(epoch_acc.cpu().numpy())
            train_losses.append(epoch_loss)

# 训练结束
time_elapsed = time.time() - since
print(f'Training complete {int(time_elapsed // 60)}m {int(time_elapsed % 60)}s')
print(f'Best val Acc: {best_acc}')

In [None]:
import matplotlib.pyplot as plt
# 绘制准确率曲线
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(val_acc_history, 'b', label='Validation acc')
plt.plot(train_acc_history, 'r', label='Training acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# 绘制损失曲线
plt.subplot(1, 2, 2)
plt.plot(valid_losses, 'b', label='Validation loss')
plt.plot(train_losses, 'r', label='Training loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
model