In [189]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,confusion_matrix, ConfusionMatrixDisplay,roc_auc_score, matthews_corrcoef
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold 

In [299]:
batch_size = 128
loaded_datasets_info = torch.load('D:/Jupyter_directory/F5C-codes/Models/saved_datasets.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']
loaded_train_loader = DataLoader(loaded_train_dataset, batch_size = batch_size, shuffle=False) 
loaded_val_loader = DataLoader(loaded_val_dataset, batch_size = batch_size, shuffle=False) 
loaded_test_loader = DataLoader(loaded_test_dataset, batch_size = batch_size, shuffle=False) 
X_train = []
y_train = []
for x, y in loaded_train_dataset:
    X_train.append(x)
    y_train.append(y)
X_test = []
y_test = []
for x, y in loaded_test_dataset:
    X_test.append(x)
    y_test.append(y)
X_val = []
y_val = []
for x, y in loaded_val_dataset:
    X_val.append(x)
    y_val.append(y)
y_train = np.array(torch.stack(y_train)).reshape(-1,)
y_test = np.array(torch.stack(y_test)).reshape(-1,)
y_val = np.array(torch.stack(y_val)).reshape(-1,)
X_train = np.array(torch.stack(X_train)).reshape(-1, 240)
X_test = np.array(torch.stack(X_test)).reshape(-1, 240)
X_val = np.array(torch.stack(X_val)).reshape(-1, 240)

In [300]:
X_train = X_train.reshape(-1, 10,24)
X_test = X_test.reshape(-1, 10,24)
X_val = X_val.reshape(-1, 10,24)

In [301]:
class MyDataSet(Dataset):
    def __init__(self, data, label):
        self.data = torch.from_numpy(data).float()
        self.label = torch.from_numpy(label).float()
        self.length = label.shape[0]
        
    def __getitem__(self, index):
        return self.data[index], self.label[index]
    def __len__(self):
        return self.length

In [302]:
train_dataset = MyDataSet(X_train, y_train) 
test_dataset = MyDataSet(X_test, y_test) 
val_dataset = MyDataSet(X_val, y_val) 

In [214]:
train_dataloader = DataLoader(train_dataset, batch_size = 16, shuffle=True) 
test_dataloader = DataLoader(test_dataset, batch_size= len(test_dataset), shuffle=True) 
val_dataloader = DataLoader(test_dataset, batch_size= len(val_dataset), shuffle=True) 

In [283]:
def metrics_output(preds,labels):
    true_labels = np.array(labels)
    predicted_probs = np.array(preds)
    binary_predictions = (predicted_probs >= 0.5).astype(int)
    auc = roc_auc_score(true_labels, predicted_probs)
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    tn, fp, fn, tp = conf_matrix.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    accuracy = accuracy_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)
    mcc = matthews_corrcoef(true_labels, binary_predictions)  
    return (auc, sensitivity, specificity, accuracy, f1, mcc)

In [291]:
class TransformerEncoderClassification(nn.Module):
    def __init__(self):
        super(TransformerEncoderClassification, self).__init__()
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=24, nhead=8), 
            num_layers= 3,
        ) 
        self.fc = nn.Linear(10*24, 2)

    def forward(self, x):
        x = x.permute(1, 0, 2)  
        x = self.transformer_encoder(x) 
        x = x.permute(1, 0, 2) 
        x = x.flatten(1) 
        x = self.fc(x)
        return x
    
model = TransformerEncoderClassification() 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
train_dataset = ConcatDataset([loaded_train_dataset, loaded_val_dataset])
train_loader = DataLoader(train_dataset, batch_size=batch_size)

for fold_idx, (train_index, val_index) in enumerate(kf.split(train_loader.dataset)):
    predicted_probabilities = []
    true_labels = []
    predicted_labels = []  
    train_loader_fold = DataLoader(Subset(train_loader.dataset, train_index), batch_size=batch_size)
    val_loader_fold = DataLoader(Subset(train_loader.dataset, val_index), batch_size=batch_size)
    model = TransformerEncoderClassification()
    
    for epoch in range(15):    
        model = TransformerEncoderClassification() 
        for batch_idx, (features, targets) in enumerate(train_loader_fold):     
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, targets) 
            loss.backward()
            optimizer.step()
    
    with torch.set_grad_enabled(False):
        for batch_idx, (features, targets) in enumerate(val_loader_fold):
            logits = model(features.float())
            _, y_pred = torch.max(logits.data, dim=1)
            predicted_labels.extend(y_pred.tolist()) 
            logits = torch.nn.functional.softmax(logits)
            predicted_probabilities.extend(logits[:,1].tolist()) 
            true_labels.extend(targets.tolist())
    #torch.save(model.state_dict(), f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/transformer_{fold_idx}.pth')
    #np.save(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val_pred_{fold_idx}.npy', predicted_probabilities)
    #np.save(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val_{fold_idx}.npy', true_labels)

In [None]:
#model = Net_conv(input_length = datas.shape[2])
#model.load_state_dict(torch.load('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/transformer_{fold_idx}.pth'))
#predicted_probabilities = np.load(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val_pred_{fold_idx}.npy')
#true_labels = np.load(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val_{fold_idx}.npy')

In [308]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities,true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

0.7314667448483833 0.7207446808510638 0.6272965879265092 0.6737120211360634 0.6869455006337135 0.349488513382641


In [309]:
#np.save('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val_pred.npy', predicted_probabilities)
#np.save('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_val.npy', true_labels)

In [313]:
predicted_labels = []  
true_labels = [] 
predicted_probabilities = []
with torch.set_grad_enabled(False): 
    for batch_idx, (features, targets) in enumerate(test_dataloader):
        logits = model(features.float())
        _, y_pred = torch.max(logits.data, dim=1)
        predicted_labels.extend(y_pred.tolist()) 
        logits = torch.nn.functional.softmax(logits)
        predicted_probabilities.extend(logits[:,1].tolist()) 
        true_labels.extend(targets.tolist())

  logits = torch.nn.functional.softmax(logits)


In [314]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities,true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

0.7317320042441504 0.7446808510638298 0.6272965879265092 0.6856010568031704 0.7017543859649124 0.3744504711298937


In [315]:
#np.save('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_test_pred.npy', predicted_probabilities)
#np.save('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/VIT/y_test.npy', true_labels)