In [None]:
import os
import torch
import gpytorch
from torch import nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from DKL_for_training_and_prediction import ResidualBlock, ResNet1D, ResNetGP, 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score,recall_score,precision_score
import torch.optim as optim
from sklearn.model_selection import KFold

In [None]:
os.chdir("path to save the analysis results")

In [None]:
# Assuming ResNet1D is properly defined
feature_extractor = ResNet1D()
model = ResNetGP(feature_extractor)

In [None]:
def shuffle_features(X):
    """Randomly shuffles the order of features in the dataset."""
    indices = np.arange(X.shape[1])
    np.random.shuffle(indices)
    return X[:, indices]

def train_and_evaluate(model, train_loader, val_loader, likelihood, optimizer, epochs=20):
    # Define the marginal log likelihood for Gaussian processes
    model_likelihood = PredictiveLogLikelihood(likelihood, model.gp, num_data=len(train_loader.dataset))

    # Train the model
    model.train()
    likelihood.train()
    for _ in range(epochs):
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_x)
            loss = -model_likelihood(output, batch_y)  # Compute the loss
            loss.backward()
            optimizer.step()

    # Evaluate the model
    model.eval()
    likelihood.eval()
    true_labels = []
    pred_probs = []
    predictions = []
    with torch.no_grad():
        for x, y in val_loader:
            preds = model(x)
            prob_pos = likelihood(preds).mean.detach()
            predictions.extend(prob_pos.numpy())
            true_labels.extend(y.numpy())
            pred_probs.extend(prob_pos.numpy())

    accuracy = accuracy_score(true_labels, (np.array(pred_probs) >= 0.7).astype(int))
    auc = roc_auc_score(true_labels, pred_probs)
    f1 = f1_score(true_labels, (np.array(pred_probs) >= 0.7).astype(int))
    recall = recall_score(true_labels, (np.array(pred_probs)  >= 0.7).astype(int))
    precision = precision_score(true_labels, (np.array(pred_probs)  >= 0.7).astype(int))
    return accuracy, auc, f1, recall, precision, predictions

In [None]:
def run_experiments(X, y, n_experiments=10):
    results_metrics = []
    results_predictions = []

    # Split data for training and validation
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    for _ in range(n_experiments):
        # Shuffling features and ensuring the data remains a tensor
        X_shuffled = torch.tensor(shuffle_features(X_train.numpy()), dtype=torch.float32)
        train_dataset = TensorDataset(X_shuffled, y_train)
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

        # Initialize model and likelihood
        feature_extractor = ResNet1D()
        model = ResNetGP(feature_extractor)
        likelihood = GaussianLikelihood()
        optimizer = torch.optim.Adam([
            {'params': model.parameters()},
            {'params': likelihood.parameters()}
        ], lr=0.001,weight_decay=1e-4)

        # Train and predict
        accuracy, auc, f1, recall, precision, predictions = train_and_evaluate(model, train_loader, test_loader, likelihood, optimizer)
        results_metrics.append((accuracy, auc, f1, recall, precision))
        results_predictions.append(predictions)

    return results_predictions, results_metrics