In [None]:
###############################
# 训练
###############################
import pickle
import random
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

# 使用cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
print(device)

truenoduleweightspath="modelpths/truenodule-cnn.pth"                # 真假阳性分类模型的权重文件
datafolder="processeddata"                                          # 第一步预处理后的数据存放文件夹


In [None]:
# 加载数据

noduleimages=np.load(datafolder+"/noduleimagesCNN.npy")
nodulelabels=np.load(datafolder+"/nodulelabelsCNN.npy")

# with open(datafolder+"/nodulesensitivity.pkl", 'rb') as f:
#     nodulesensitivity = pickle.load(f)
with open(datafolder+"/slicecountsCNN.pkl", 'rb') as f:
    slicecounts = pickle.load(f)

noduleimages[noduleimages==-0]=0
# nodulelabels[nodulelabels<=0]=0
# nodulelabels[nodulelabels>0]=1



In [None]:
# 假阳性样本序号列表

falseind = [i for i in range(len(nodulelabels)) if nodulelabels[i] == False]
random.shuffle(falseind)
falseind = falseind[:noduleimages.shape[0]]


In [None]:
# 计算TP和FP

TP=len([nl for nl in nodulelabels if nl==True])
FP=len([nl for nl in nodulelabels if nl==False])
print("Number of True Positive nodules:",TP)
print("Number of False Positive nodules:",FP)
print("# of FPs per TP",FP/TP)


In [None]:
# 平衡数据集

# 从FP中随机选择与TP数量相同的样本
random.seed()
FPindices=random.sample([i for i in range(len(nodulelabels)) if nodulelabels[i]==False],TP)
noduleimages=noduleimages[[i for i in range(len(nodulelabels)) if nodulelabels[i]==True]+FPindices]
nodulelabels=nodulelabels[[i for i in range(len(nodulelabels)) if nodulelabels[i]==True]+FPindices]
print("Number of True Positive nodules:",len([nl for nl in nodulelabels if nl==True]))
print("Number of False Positive nodules:",len([nl for nl in nodulelabels if nl==False]))
print("# of FPs per TP",len([nl for nl in nodulelabels if nl==False])/len([nl for nl in nodulelabels if nl==True]))

noduleimages = torch.from_numpy(noduleimages).float()
nodulelabels = F.one_hot(torch.from_numpy(nodulelabels).long(), 2).float()

print(nodulelabels.size())

In [None]:
# 划分数据集

X_train, X_test, y_train, y_test = train_test_split(noduleimages, nodulelabels, test_size=0.3, random_state=114)

del noduleimages, nodulelabels

# 定义数据集类

class NoduleDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return (image, label)
    
# Create instances of the dataset
train_dataset = NoduleDataset(X_train, y_train)
test_dataset = NoduleDataset(X_test, y_test)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
# 分类结节与非结节

# 定义模型
from TrueNoduleClassifier import NoduleClassifier

# Create an instance of the model
model = NoduleClassifier().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

history = {
    'train_loss': [],
    'val_loss': [],
    'train_accuracy': [],
    'val_accuracy': []
}

# best_val_loss = float('inf')
# best_loss = float('inf')


In [None]:
# 训练模型

from tqdm.notebook import tqdm

best_accuracy = 0
best_loss = float('inf')

num_epochs = 100
for epoch in tqdm(range(num_epochs)):
    print(f'Epoch {epoch + 1}/{num_epochs}:')
    model.train()
    total_samples = 0
    total_correct = 0
    train_loss = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)
        loss = criterion(outputs, labels)
        train_loss += loss.item()
        # train_accuracy = (outputs.argmax(dim=1) == labels).float().mean()
        total_correct = total_correct + (predicted == labels[:,1]).sum().item()
        total_samples = total_samples + (predicted.size(0))
        loss.requires_grad_(True)
        loss.backward()
        optimizer.step()
    loss = train_loss / len(train_loader)
    train_accuracy = total_correct / total_samples
    total_correct = 0
    total_samples = 0
    
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        total_val_loss = 0
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.cuda(), labels.cuda()
            outputs = model(inputs)
            _, predicted = torch.max(outputs, dim=1)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
            total_correct = total_correct + (predicted == labels[:,1]).sum().item()
            total_samples = total_samples + (predicted.size(0))

        val_loss = total_val_loss / len(test_loader)
        val_accuracy = total_correct / total_samples
        print(f'Training loss: {loss:.4f},   Training accuracy: {train_accuracy:.4f}\n'
              f'Validation loss: {val_loss:.4f}, Validation accuracy: {val_accuracy:.4f}')

    # Save the model if validation loss is improved
    if loss < best_loss:
        print(f'Train loss decreased from {best_loss} to {loss}.')
        print('Update best parameters, and save weights.')
        best_loss = loss
        best_param = model.state_dict()
        torch.save(model.state_dict(), truenoduleweightspath)

    history['train_loss'].append(loss)
    history['val_loss'].append(val_loss)
    history['train_accuracy'].append(train_accuracy)
    history['val_accuracy'].append(val_accuracy)
# 36min 42.3s

In [None]:

for i in range(len(history['train_accuracy'])):
    try:
        history['train_accuracy'][i] = history['train_accuracy'][i].item()
    except:
        pass


In [None]:
print(history['train_accuracy'])

In [None]:
import pickle

with open('historyTPCNN.pkl', 'wb') as f:
    pickle.dump(history, f)


In [None]:
# 绘制损失和准确率变化折线图

plt.plot(history['train_loss'], color='b')
plt.plot(history['val_loss'], color='g')
plt.xlabel("Epoch")
plt.ylabel("Log Loss")
plt.legend(["Train", "Validation"])
plt.show()
plt.plot(history['train_accuracy'], color='b') #acc
plt.plot(history['val_accuracy'], color='g')  #val_acc
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["Train", "Validation"])
plt.show()

In [None]:
# 将测试数据集分为只有真结节或非结节的两类

XtestTrue_tensor = X_test[y_test[:,1]==1]
YtestTrue_tensor = y_test[y_test[:,1]==1]

XtestFalse_tensor = X_test[y_test[:,1]==0]
YtestFalse_tensor = y_test[y_test[:,1]==0]

torch.cuda.empty_cache()

In [None]:
# 建立数据加载器

XtestTrue_loader = DataLoader(NoduleDataset(XtestTrue_tensor, YtestTrue_tensor), batch_size=32, shuffle=False)
XtestFalse_loader = DataLoader(NoduleDataset(XtestFalse_tensor, YtestFalse_tensor), batch_size=32, shuffle=False)

# 加载模型

model.load_state_dict(torch.load(truenoduleweightspath))
model.eval()

from tqdm.notebook import tqdm

correct_true = 0
total_true = 0
correct_false = 0
total_false = 0
loss_true = 0
loss_false = 0

criterion = nn.CrossEntropyLoss()

predlabels = []

with torch.no_grad():
    for inputs, labels in tqdm(XtestTrue_loader):
        inputs, labels = inputs.cuda(), labels.cuda()
        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)
        predlabels.extend(predicted.cpu().numpy())
        total_true += predicted.size(0)
        correct_true += (predicted == labels[:,1].long()).sum().item()
        loss_true += criterion(outputs, labels).item()

    for inputs, labels in tqdm(XtestFalse_loader):
        inputs, labels = inputs.cuda(), labels.cuda()
        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)
        predlabels.extend(predicted.cpu().numpy())
        total_false += predicted.size(0)
        correct_false += (predicted == labels[:,1].long()).sum().item()
        loss_false += criterion(outputs, labels).item()

accuracy_true = correct_true / total_true
accuracy_false = correct_false / total_false

print(f'Accuracy for True nodules: {accuracy_true:.4f}')
print(f'Accuracy for False nodules: {accuracy_false:.4f}')

loss_true /= len(XtestTrue_loader)
loss_false /= len(XtestFalse_loader)

print(f'Loss for True nodules: {loss_true:.4f}')
print(f'Loss for False nodules: {loss_false:.4f}')

score_true = {'loss': loss_true, 'accuracy': accuracy_true}
score_false = {'loss': loss_false, 'accuracy': accuracy_false}


In [None]:

print("Sensitivity:", 0.7716) # Evaluated from 2TrainUnet.ipynb
print("FP Rate/slice:", len(falseind)/(sum(slicecounts))) # +slicecounts2
print("FP Rate/slice after nodule classification:", len(falseind)*(1-score_false['accuracy'])/(sum(slicecounts))) # +slicecounts2
print("Sensitivity after nodule classification:", 0.7716*score_true['accuracy'])

In [None]:

predlabels = np.array(predlabels)
TP=len([nl for nl in predlabels if nl==True])
FP=len([nl for nl in predlabels if nl==False])
print("Number of True Positive nodules:",TP)
print("Number of False Positive nodules:",FP)
print("# of FPs per TP",FP/TP)