In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import RobustScaler, PowerTransformer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_name1 = '/content/drive/MyDrive/EXP/SHAP/Train/V_train_bert_mean4.csv'
file_name2 = '/content/drive/MyDrive/EXP/SHAP/Test/V_test_bert_mean4.csv'
train_data = pd.read_csv(file_name1)
test_data = pd.read_csv(file_name2)

In [None]:
train_data.shape

(4599, 1025)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np

In [None]:
val_ratio = 0.05  # 5% of the training data is used as validation
val_size = int(len(train_data) * val_ratio)
train_size = len(train_data) - val_size

train_dataset_full = TensorDataset(torch.tensor(train_data.drop('target', axis=1).values, dtype=torch.float32),
                                   torch.tensor(train_data['target'].values, dtype=torch.float32).view(-1, 1))

train_dataset, val_dataset = torch.utils.data.random_split(train_dataset_full, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_dataset = TensorDataset(torch.tensor(test_data.drop('target', axis=1).values, dtype=torch.float32),
                             torch.tensor(test_data['target'].values, dtype=torch.float32).view(-1, 1))
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()
        self.feature_dim = feature_dim
        self.att_weights = nn.Parameter(torch.Tensor(1, feature_dim))
        nn.init.xavier_uniform_(self.att_weights)

    def forward(self, x):
        att_scores = torch.matmul(x, self.att_weights.T)
        att_weights = torch.softmax(att_scores, dim=1)
        attended = x * att_weights
        return torch.sum(attended, dim=1)


class AttModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AttModel, self).__init__()
        self.attention = Attention(input_size)
        self.fc = nn.Linear(hidden_size, output_size)
        self.bn = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(0.7)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        att = self.attention(x).unsqueeze(0)
        out = self.bn(att)
        out = self.dropout(out)
        out = self.fc(out)
        return self.sigmoid(out)


model = AttModel(input_size=1024, hidden_size=64, output_size=1)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999), eps=1e-8)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
test_accuracies = []
def train_model(model, train_loader, val_loader, test_loader, optimizer, criterion, num_epochs , scheduler):
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            predicted_train = torch.round(outputs)
            total_train += labels.size(0)
            correct_train += (predicted_train == labels).sum().item()

        train_accuracy = correct_train / total_train * 100
        scheduler.step()

        model.eval()
        total_val_loss = 0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()
                predicted_val = torch.round(outputs)
                total_val += labels.size(0)
                correct_val += (predicted_val == labels).sum().item()

        val_accuracy = correct_val / total_val * 100

        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {total_train_loss / len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Validation Loss: {total_val_loss / len(val_loader):.4f}, '
              f'Validation Accuracy: {val_accuracy:.2f}%')

def test_model(model):
      model.eval()
      correct = 0
      total = 0

      with torch.no_grad():
          for inputs, labels in test_loader:
              outputs = model(inputs)
              predicted = torch.round(outputs)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      accuracy = correct / total * 100
      test_accuracies.append(accuracy)

from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, confusion_matrix, precision_score, recall_score, f1_score, matthews_corrcoef

def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            predicted_probs = outputs.squeeze().detach().cpu().numpy()
            predictions.extend(predicted_probs)
            true_labels.extend(labels.squeeze().detach().cpu().numpy())

    predicted_labels = [1 if x > 0.5 else 0 for x in predictions]

    auc_score = roc_auc_score(true_labels, predictions)

    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)

    tn, fp, fn, tp = confusion_matrix(true_labels, predicted_labels).ravel()
    specificity = tn / (tn + fp)
    sensitivity = recall

    mcc = matthews_corrcoef(true_labels, predicted_labels)

    precision_points, recall_points, _ = precision_recall_curve(true_labels, predictions)
    aupr = auc(recall_points, precision_points)

    # Compute True Positive Rate (TPR) and False Positive Rate (FPR)
    tpr = tp / (tp + fn)
    fpr = fp / (fp + tn)

    # Print confusion matrix
    confusion_matrix_result = confusion_matrix(true_labels, predicted_labels)
    print("Confusion Matrix:")
    print(confusion_matrix_result)

    metrics = {
        'AUC': auc_score,
        'Specificity': specificity,
        'Sensitivity': sensitivity,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'MCC': mcc,
        'AUPR': aupr,
        'TPR': tpr,
        'FPR': fpr,
        'Confusion_Matrix': confusion_matrix_result
    }

    return metrics

In [None]:
for i in range(1, 101):
    model1 = model  # Assuming you have defined these variables earlier
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01 , betas=(0.9, 0.999), eps=1e-8)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model1, train_loader, val_loader, test_loader, optimizer, criterion, i , scheduler)
    print('-----------------------------------------------------------------------------------------------------------------------')
    test_model(model1)
    print(test_accuracies)
    print(evaluate_model(model1, test_loader))
    print('-----------------------------------------------------------------------------------------------------------------------')

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 16])

In [None]:
max_value = max(test_accuracies)
print("Maximum Accuracy:", max_value)

min_value = min(test_accuracies)
print("Minimum Accuracy:", min_value)

Maximum Accuracy: 88.14814814814815
Minimum Accuracy: 84.07407407407408


In [None]:
sorted_test_accuracies = sorted(enumerate(test_accuracies), key=lambda x: x[1], reverse=True)

for index, accuracy in sorted_test_accuracies:
    print(f"Index: {index+1}, Test Accuracy: {accuracy:.2f}%")

Index: 55, Test Accuracy: 88.15%
Index: 79, Test Accuracy: 88.15%
Index: 29, Test Accuracy: 88.02%
Index: 65, Test Accuracy: 87.90%
Index: 89, Test Accuracy: 87.90%
Index: 69, Test Accuracy: 87.78%
Index: 58, Test Accuracy: 87.65%
Index: 59, Test Accuracy: 87.65%
Index: 93, Test Accuracy: 87.65%
Index: 28, Test Accuracy: 87.53%
Index: 41, Test Accuracy: 87.41%
Index: 45, Test Accuracy: 87.41%
Index: 21, Test Accuracy: 87.28%
Index: 22, Test Accuracy: 87.28%
Index: 70, Test Accuracy: 87.28%
Index: 87, Test Accuracy: 87.28%
Index: 18, Test Accuracy: 87.16%
Index: 36, Test Accuracy: 87.16%
Index: 38, Test Accuracy: 87.16%
Index: 61, Test Accuracy: 87.16%
Index: 94, Test Accuracy: 87.16%
Index: 98, Test Accuracy: 87.16%
Index: 14, Test Accuracy: 87.04%
Index: 49, Test Accuracy: 87.04%
Index: 60, Test Accuracy: 87.04%
Index: 88, Test Accuracy: 87.04%
Index: 92, Test Accuracy: 87.04%
Index: 10, Test Accuracy: 86.91%
Index: 20, Test Accuracy: 86.91%
Index: 39, Test Accuracy: 86.91%
Index: 90,

In [None]:
#Run these two cells

In [None]:
num_epochs = 24
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
    scheduler.step()

    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_val_loss += loss.item()

    print(f'Epoch {epoch+1}, Train Loss: {total_train_loss / len(train_loader):.4f}, '
          f'Validation Loss: {total_val_loss / len(val_loader):.4f}')

In [None]:
from sklearn.metrics import roc_auc_score, matthews_corrcoef, precision_score, recall_score, f1_score, \
    accuracy_score, roc_curve, precision_recall_curve

true_labels = []
predicted_probabilities = []

model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted_probabilities.extend(outputs.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

true_labels = np.array(true_labels)
predicted_probabilities = np.array(predicted_probabilities)

auc = roc_auc_score(true_labels, predicted_probabilities)

mcc = matthews_corrcoef(true_labels, np.round(predicted_probabilities))

precision = precision_score(true_labels, np.round(predicted_probabilities))
recall = recall_score(true_labels, np.round(predicted_probabilities))
f1 = f1_score(true_labels, np.round(predicted_probabilities))

accuracy = accuracy_score(true_labels, np.round(predicted_probabilities)) * 100

fpr, tpr, thresholds = roc_curve(true_labels, predicted_probabilities)
sensitivity = tpr
specificity = 1 - fpr

precision, recall, _ = precision_recall_curve(true_labels, predicted_probabilities)
aupr = np.trapz(precision, recall)


print(f'Test Accuracy: {accuracy:.2f}%')
print(f'AUC: {auc:.4f}')
print(f'MCC: {mcc:.4f}')
print(f'Precision: {precision.mean():.4f}')  # Average precision
print(f'Recall: {recall.mean():.4f}')  # Average recall
print(f'F1-score: {f1:.4f}')
print(f'Sensitivity: {sensitivity.mean():.4f}')  # Average sensitivity
print(f'Specificity: {specificity.mean():.4f}')  # Average specificity
print(f'AUPR: {aupr:.4f}')