In [109]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader,TensorDataset, random_split, ConcatDataset
import torch.optim as optim
from sklearn.model_selection import KFold 
import torch.optim.lr_scheduler as lr_scheduler
import matplotlib.pyplot as plt
from torch.utils.data import Subset
from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix, accuracy_score, f1_score, matthews_corrcoef

In [110]:
learning_rate = 0.0001
num_epochs = 50
batch_size = 128
num_classes = 2

In [111]:
batch_size = 128
loaded_datasets_info = torch.load('saved_datasets.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']
loaded_train_loader = DataLoader(loaded_train_dataset, batch_size = batch_size, shuffle=True) 
loaded_val_loader = DataLoader(loaded_val_dataset, batch_size = batch_size, shuffle=True) 
loaded_test_loader = DataLoader(loaded_test_dataset, batch_size = batch_size, shuffle=True) 

In [112]:
def metrics_output(preds,labels):
    true_labels = np.array(labels)
    predicted_probs = np.array(preds)
    binary_predictions = (predicted_probs >= 0.5).astype(int)
    auc = roc_auc_score(true_labels, predicted_probs)
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    tn, fp, fn, tp = conf_matrix.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    accuracy = accuracy_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)
    mcc = matthews_corrcoef(true_labels, binary_predictions)  
    return (auc, sensitivity, specificity, accuracy, f1, mcc)

In [113]:
class BinaryLSTM(nn.Module):
    def __init__(self, input_size=205, hidden_size=256, num_layers=2):
        super(BinaryLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.linear(lstm_out[:, -1, :])
        output = self.sigmoid(output)
        return output

model = BinaryLSTM()
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 50

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
train_dataset = ConcatDataset([loaded_train_dataset, loaded_val_dataset])
train_loader = DataLoader(train_dataset, batch_size=batch_size)

for fold_idx, (train_index, val_index) in enumerate(kf.split(train_loader.dataset)):
    predicted_probabilities = []
    true_labels = []
    train_loader_fold = DataLoader(Subset(train_loader.dataset, train_index), batch_size=batch_size)
    val_loader_fold = DataLoader(Subset(train_loader.dataset, val_index), batch_size=batch_size)
    model = Net_conv(input_length = 240)
    
    for epoch in range(num_epochs):    
        model = BinaryLSTM()
        for batch_idx, (features, targets) in enumerate(train_loader_fold):     
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, targets) 
            loss.backward()
            optimizer.step()
    
    with torch.set_grad_enabled(False):
        for batch_idx, (features, targets) in enumerate(val_loader_fold):
            logits = model(features.float())
            probabilities = logits
            predicted_probabilities.extend(probabilities.tolist())
            true_labels.extend(targets.tolist())
    #torch.save(model.state_dict(), f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM/LSTM_{fold_idx}.pth')
    #np.save(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM/y_val_pred_{fold_idx}.npy', predicted_probabilities)
    #np.save(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM/y_val_{fold_idx}.npy', true_labels)

In [None]:
#model = Net_conv(input_length = datas.shape[2])
#model.load_state_dict(torch.load('D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM_{fold_idx}.pth'))
#predicted_probabilities = np.load(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM/y_val_pred_{fold_idx}.npy')
#true_labels = np.load(f'D:/Jupyter_directory/Res-F5C-main/Results(Yue)/Geo/LSTM/y_val_{fold_idx}.npy')

In [116]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities,true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

0.7283154512281467 0.6754098360655738 0.6578073089700996 0.6666666666666666 0.6710097719869708 0.33327523412122567


In [117]:
#np.save('D:/Jupyter_directory/y_val_pred.npy', predicted_probabilities)
#np.save('D:/Jupyter_directory/y_val.npy', true_labels)

In [118]:
predicted_probabilities = []  
true_labels = []  
with torch.set_grad_enabled(False): 
    for batch_idx, (features, targets) in enumerate(loaded_test_loader):
        logits = model(features.float())
        probabilities = logits
        predicted_probabilities.extend(probabilities.tolist()) 
        true_labels.extend(targets.tolist())

In [119]:
roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC = metrics_output(predicted_probabilities,true_labels)
print(roc_auc, metrics_sn, metrics_sp, metrics_ACC, metrics_F1, metrics_MCC)

0.719783595113438 0.672 0.643979057591623 0.6578599735799208 0.6605504587155964 0.3160651184505049


In [122]:
#np.save('D:/Jupyter_directory/y_test_pred.npy', predicted_probabilities)
#np.save('D:/Jupyter_directory/y_test.npy', true_labels)