In [42]:
# Import libraries for analysis
# Sklearn was used for developing the ML model
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the CSV file with primary and chronic CMV infection antibody profiling data
file_path = '....Primary_Chronic_CMV.csv'
df = pd.read_csv(file_path)

# Rename the specified columns
df.rename(columns={df.columns[0]: 'Subject ID', df.columns[1]: 'CMV Infection Status'}, inplace=True)

# Remove the 3rd column (index 2)
df.drop(columns=df.columns[2], inplace=True)

# Replace the first row with empty strings
df.iloc[0] = ""

# Drop the first row (now empty)
df = df.drop(0)

# Convert CMV Infection Status to binary (Primary: 1, Chronic: 0)
df['CMV Infection Status'] = df['CMV Infection Status'].apply(lambda x: 1 if x == 'Primary' else 0)

# Drop the Subject ID column
df = df.drop(columns=['Subject ID'])

# Remove columns with 'IgM' in their labels

# Model can also be run including IgM however the model reported in
# the manuscript does not use IgM due to it being the diagnsotic marker
# for determing infection status.
df = df.loc[:, ~df.columns.str.contains('IgM')]

# Separate features and target variable
X = df.drop(columns=['CMV Infection Status'])
y = df['CMV Infection Status']

accuracies = []

# Initialize an array to store cumulative feature importance
feature_importances = np.zeros(X.shape[1])

#Repeat a five-fold cross validated framework 100 times
# Use 80-20 train test split
sss = StratifiedShuffleSplit(n_splits=100, test_size=0.2)

# Iterate over the StratifiedShuffleSplit
for train_index, test_index in sss.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Initialize the logistic regression model with L1 penalty (Lasso)
    model = LogisticRegression(penalty='l1', solver='liblinear', max_iter=1000)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions of CMV infection status
    y_pred = model.predict(X_test)

    # Evaluate the model accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    
     # Print predictions and probabilities for each subject
    for actual, predicted, prob in zip(y_test, y_pred, y_prob):
        print(f'Actual: {actual}, Predicted: {predicted}, Probability: {prob:.4f}')


    # Accumulate feature importance
    feature_importances += model.coef_[0]
    
# Calculate average feature importance
average_feature_importances = feature_importances / 100

# Print the average feature importances
print("Average Feature Importances over 100 runs:")
for feature, importance in zip(X.columns, average_feature_importances):
    print(f'{feature}: {importance:.4f}')

# Print the average accuracy over the 100 runs
average_accuracy = np.mean(accuracies)
print(f'Average Accuracy over 100 runs: {average_accuracy:.4f}')


Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0013
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9984
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0,

Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9984
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 0, Probability: 1.0000
Actual: 1,

Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0013
Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9984
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9897
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1,

Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9984
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 1,

Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.9996
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9984
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1,



Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0013
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.9996
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9984
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9897
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0,



Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9984
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0,



Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.9996
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9984
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1,

Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0013
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.9996
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0, Predicted: 0, Probability: 0.9984
Actual: 0, Predicted: 0, Probability: 0.0000
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 0, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 0.9897
Actual: 1, Predicted: 1, Probability: 0.0000
Actual: 0, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 1, Predicted: 1, Probability: 1.0000
Actual: 0,