# Post-Training Kernel-Weights Analysis

### Setup & Import Training Models

In [None]:
# Import modules
import pandas as pd
import numpy as np
import pyreadr
import tqdm

import os

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn import metrics

import matplotlib.pyplot as plt

from math import sqrt

In [None]:
# Import coefficients
with open('Preprocessed/Coef_Phen.npy', 'rb') as f:
    Coef_Phen = np.load(f)
with open('Preprocessed/Inter_Phen.npy', 'rb') as f:
    Inter_Phen = np.load(f)
    
with open('Preprocessed/Coef_sMRI.npy', 'rb') as f:
    Coef_sMRI = np.load(f)
with open('Preprocessed/Inter_sMRI.npy', 'rb') as f:
    Inter_sMRI = np.load(f)
    
with open('Preprocessed/Coef_dMRI.npy', 'rb') as f:
    Coef_dMRI = np.load(f)
with open('Preprocessed/Inter_dMRI.npy', 'rb') as f:
    Inter_dMRI = np.load(f)
    
with open('Preprocessed/Coef_rsfMRI.npy', 'rb') as f:
    Coef_rsfMRI = np.load(f)
with open('Preprocessed/Inter_rsfMRI.npy', 'rb') as f:
    Inter_rsfMRI = np.load(f)
    
with open('Preprocessed/Coef_tsfMRI.npy', 'rb') as f:
    Coef_tsfMRI = np.load(f)
with open('Preprocessed/Inter_tsfMRI.npy', 'rb') as f:
    Inter_tsfMRI = np.load(f)
    
with open('Preprocessed/Coef_Gene.npy', 'rb') as f:
    Coef_Gene = np.load(f)
with open('Preprocessed/Inter_Gene.npy', 'rb') as f:
    Inter_Gene = np.load(f)

In [None]:
# Import data
def getData(data):
  return pyreadr.read_r(data)[None]

batch_df = getData("Preprocessed/Batch_Phen.rds")
batch_ex = getData("Preprocessed/Batch_Experimental.rds")
ocd_df = getData("OCD.rds")

Phen_df = getData("Preprocessed/Kernel_Phen.rds")
sMRI_df = getData("Preprocessed/Kernel_sMRI.rds")
dMRI_df = getData("Preprocessed/Kernel_dMRI.rds")
rsfMRI_df = getData("Preprocessed/Kernel_rsfMRI.rds")
tsfMRI_df = getData("Preprocessed/Kernel_tsfMRI.rds")
Gene_df = getData("Preprocessed/Kernel_Gene.rds")

### Dervive probabilities from test set

In [None]:
# Phenotype kernel
train_model_coef = np.mean(Coef_Phen, axis=0)[0]
train_model_intercept = np.mean(Inter_Phen)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
Phen_train_np = Phen_df.loc[Phen_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(Phen_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(Phen_train_np[i],train_model_coef)+train_model_intercept))))
Phen_y_prob=np.array(train_y_prob)

# sMRI kernel
train_model_coef = np.mean(Coef_sMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_sMRI)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
sMRI_train_np = sMRI_df.loc[sMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(sMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(sMRI_train_np[i],train_model_coef)+train_model_intercept))))
sMRI_y_prob=np.array(train_y_prob)

# dMRI kernel
train_model_coef = np.mean(Coef_dMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_dMRI)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
dMRI_train_np = dMRI_df.loc[dMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(dMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(dMRI_train_np[i],train_model_coef)+train_model_intercept))))
dMRI_y_prob=np.array(train_y_prob)

# rsfMRIotype kernel
train_model_coef = np.mean(Coef_rsfMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_rsfMRI)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
rsfMRI_train_np = rsfMRI_df.loc[rsfMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(rsfMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(rsfMRI_train_np[i],train_model_coef)+train_model_intercept))))
rsfMRI_y_prob=np.array(train_y_prob)


# tsfMRI kernel
train_model_coef = np.mean(Coef_tsfMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_tsfMRI)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
tsfMRI_train_np = tsfMRI_df.loc[tsfMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(tsfMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(tsfMRI_train_np[i],train_model_coef)+train_model_intercept))))
tsfMRI_y_prob=np.array(train_y_prob)


# Geneotype kernel
train_model_coef = np.mean(Coef_Gene, axis=0)[0]
train_model_intercept = np.mean(Inter_Gene)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
Gene_train_np = Gene_df.loc[Gene_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(Gene_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(Gene_train_np[i],train_model_coef)+train_model_intercept))))
Gene_y_prob=np.array(train_y_prob)

### AUC Based Weights

In [None]:
# Calculated total probability of 6 models

MOD_AUC_6 = 0.2441 * Phen_y_prob + 0.1575 * rsfMRI_y_prob + 0.1549 * sMRI_y_prob + 0.1496 * dMRI_y_prob + 0.1470 * tsfMRI_y_prob + 0.1470 * Gene_y_prob
MOD_AUC_5 = 0.2862 * Phen_y_prob + 0.1846 * rsfMRI_y_prob + 0.1815 * sMRI_y_prob + 0.1754 * dMRI_y_prob + 0.1723 * tsfMRI_y_prob
MOD_AUC_4 = 0.3457 * Phen_y_prob + 0.2230 * rsfMRI_y_prob + 0.2193 * sMRI_y_prob + 0.2119 * dMRI_y_prob
MOD_AUC_3 = 0.4387 * Phen_y_prob + 0.2830 * rsfMRI_y_prob + 0.2783 * sMRI_y_prob
MOD_AUC_2 = 0.6078 * Phen_y_prob + 0.3922 * rsfMRI_y_prob
MOD_AUC_1 = 1.0000 * Phen_y_prob

# Derive case-status label

MOD_AUC_6[MOD_AUC_6>0.5]=1
MOD_AUC_6[MOD_AUC_6<=0.5]=0

MOD_AUC_5[MOD_AUC_5>0.5]=1
MOD_AUC_5[MOD_AUC_5<=0.5]=0

MOD_AUC_4[MOD_AUC_4>0.5]=1
MOD_AUC_4[MOD_AUC_4<=0.5]=0

MOD_AUC_3[MOD_AUC_3>0.5]=1
MOD_AUC_3[MOD_AUC_3<=0.5]=0

MOD_AUC_2[MOD_AUC_2>0.5]=1
MOD_AUC_2[MOD_AUC_2<=0.5]=0

MOD_AUC_1[MOD_AUC_1>0.5]=1
MOD_AUC_1[MOD_AUC_1<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_AUC_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_AUC_5.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 5 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_AUC_4.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 4 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_AUC_3.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_AUC_2.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_AUC_1.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on AUC weights and 1 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

### Accuracy Based Weights

In [None]:
# Calculated total probability of 6 models

MOD_ACC_6 = 0.2408 * Phen_y_prob + 0.1586 * tsfMRI_y_prob + 0.1586 * rsfMRI_y_prob + 0.1501 * sMRI_y_prob + 0.1501 * Gene_y_prob + 0.1416 * dMRI_y_prob
MOD_ACC_5 = 0.2805 * Phen_y_prob + 0.1848 * tsfMRI_y_prob + 0.1848 * rsfMRI_y_prob + 0.1749 * sMRI_y_prob + 0.1749 * Gene_y_prob
MOD_ACC_4 = 0.3400 * Phen_y_prob + 0.2240 * tsfMRI_y_prob + 0.2240 * rsfMRI_y_prob + 0.2120 * sMRI_y_prob
MOD_ACC_3 = 0.4315 * Phen_y_prob + 0.2843 * tsfMRI_y_prob + 0.2843 * rsfMRI_y_prob
MOD_ACC_2 = 0.6028 * Phen_y_prob + 0.3972 * tsfMRI_y_prob
MOD_ACC_1 = 1.0000 * Phen_y_prob

# Derive case-status label

MOD_ACC_6[MOD_ACC_6>0.5]=1
MOD_ACC_6[MOD_ACC_6<=0.5]=0

MOD_ACC_5[MOD_ACC_5>0.5]=1
MOD_ACC_5[MOD_ACC_5<=0.5]=0

MOD_ACC_4[MOD_ACC_4>0.5]=1
MOD_ACC_4[MOD_ACC_4<=0.5]=0

MOD_ACC_3[MOD_ACC_3>0.5]=1
MOD_ACC_3[MOD_ACC_3<=0.5]=0

MOD_ACC_2[MOD_ACC_2>0.5]=1
MOD_ACC_2[MOD_ACC_2<=0.5]=0

MOD_ACC_1[MOD_ACC_1>0.5]=1
MOD_ACC_1[MOD_ACC_1<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_ACC_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_ACC_5.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 5 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_ACC_4.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 4 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_ACC_3.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_ACC_2.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_ACC_1.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 1 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

### Specificity Based Weights

In [None]:
# Calculated total probability of 6 models

MOD_SPE_6 = 0.2087 * Phen_y_prob + 0.1990 * tsfMRI_y_prob + 0.1990 * dMRI_y_prob + 0.1796 * sMRI_y_prob + 0.1408 * rsfMRI_y_prob + 0.0728 * Gene_y_prob
MOD_SPE_5 = 0.2251 * Phen_y_prob + 0.2147 * tsfMRI_y_prob + 0.2147 * dMRI_y_prob + 0.1937 * sMRI_y_prob + 0.1518 * rsfMRI_y_prob
MOD_SPE_4 = 0.2654 * Phen_y_prob + 0.2531 * tsfMRI_y_prob + 0.2531 * dMRI_y_prob + 0.2284 * sMRI_y_prob
MOD_SPE_3 = 0.3440 * Phen_y_prob + 0.3280 * tsfMRI_y_prob + 0.3280 * dMRI_y_prob
MOD_SPE_2 = 0.5119 * Phen_y_prob + 0.4881 * tsfMRI_y_prob
MOD_SPE_1 = 1.0000 * Phen_y_prob

# Derive case-status label

MOD_SPE_6[MOD_SPE_6>0.5]=1
MOD_SPE_6[MOD_SPE_6<=0.5]=0

MOD_SPE_5[MOD_SPE_5>0.5]=1
MOD_SPE_5[MOD_SPE_5<=0.5]=0

MOD_SPE_4[MOD_SPE_4>0.5]=1
MOD_SPE_4[MOD_SPE_4<=0.5]=0

MOD_SPE_3[MOD_SPE_3>0.5]=1
MOD_SPE_3[MOD_SPE_3<=0.5]=0

MOD_SPE_2[MOD_SPE_2>0.5]=1
MOD_SPE_2[MOD_SPE_2<=0.5]=0

MOD_SPE_1[MOD_SPE_1>0.5]=1
MOD_SPE_1[MOD_SPE_1<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_SPE_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_SPE_5.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 5 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_SPE_4.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 4 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_SPE_3.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_SPE_2.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_SPE_1.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on specificity weights and 1 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

### Cohen's Kappa Based Weights

In [None]:
# Calculated total probability of 6 models

MOD_COK_6 = 0.7143 * Phen_y_prob + 0.1190 * tsfMRI_y_prob + 0.0952 * rsfMRI_y_prob + 0.0476 * sMRI_y_prob + 0.0238 * Gene_y_prob + 0.0000 * dMRI_y_prob
MOD_COK_5 = 0.7143 * Phen_y_prob + 0.1190 * tsfMRI_y_prob + 0.0952 * rsfMRI_y_prob + 0.0476 * sMRI_y_prob + 0.0238 * Gene_y_prob
MOD_COK_4 = 0.7317 * Phen_y_prob + 0.1220 * tsfMRI_y_prob + 0.0976 * rsfMRI_y_prob + 0.0488 * sMRI_y_prob
MOD_COK_3 = 0.7692 * Phen_y_prob + 0.1282 * tsfMRI_y_prob + 0.1026 * rsfMRI_y_prob
MOD_COK_2 = 0.8571 * Phen_y_prob + 0.1429 * tsfMRI_y_prob
MOD_COK_1 = 1.0000 * Phen_y_prob

# Derive case-status label

MOD_COK_6[MOD_COK_6>0.5]=1
MOD_COK_6[MOD_COK_6<=0.5]=0

MOD_COK_5[MOD_COK_5>0.5]=1
MOD_COK_5[MOD_COK_5<=0.5]=0

MOD_COK_4[MOD_COK_4>0.5]=1
MOD_COK_4[MOD_COK_4<=0.5]=0

MOD_COK_3[MOD_COK_3>0.5]=1
MOD_COK_3[MOD_COK_3<=0.5]=0

MOD_COK_2[MOD_COK_2>0.5]=1
MOD_COK_2[MOD_COK_2<=0.5]=0

MOD_COK_1[MOD_COK_1>0.5]=1
MOD_COK_1[MOD_COK_1<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_COK_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_COK_5.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 5 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_COK_4.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 4 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_COK_3.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_COK_2.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_COK_1.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Cohen's kappa weights and 1 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

### Positive Predictive Value Based Weights

In [None]:
# Calculated total probability of 6 models

MOD_PRE_6 = 0.2363 * Phen_y_prob + 0.1731 * tsfMRI_y_prob + 0.1593 * rsfMRI_y_prob + 0.1511 * sMRI_y_prob + 0.1429 * Gene_y_prob + 0.1374 * dMRI_y_prob
MOD_PRE_5 = 0.2739 * Phen_y_prob + 0.2006 * tsfMRI_y_prob + 0.1847 * rsfMRI_y_prob + 0.1752 * sMRI_y_prob + 0.1656 * Gene_y_prob
MOD_PRE_4 = 0.3282 * Phen_y_prob + 0.2405 * tsfMRI_y_prob + 0.2214 * rsfMRI_y_prob + 0.2099 * sMRI_y_prob
MOD_PRE_3 = 0.4155 * Phen_y_prob + 0.3043 * tsfMRI_y_prob + 0.2802 * rsfMRI_y_prob
MOD_PRE_2 = 0.5772 * Phen_y_prob + 0.4228 * tsfMRI_y_prob
MOD_PRE_1 = 1.0000 * Phen_y_prob

# Derive case-status label

MOD_PRE_6[MOD_PRE_6>0.5]=1
MOD_PRE_6[MOD_PRE_6<=0.5]=0

MOD_PRE_5[MOD_PRE_5>0.5]=1
MOD_PRE_5[MOD_PRE_5<=0.5]=0

MOD_PRE_4[MOD_PRE_4>0.5]=1
MOD_PRE_4[MOD_PRE_4<=0.5]=0

MOD_PRE_3[MOD_PRE_3>0.5]=1
MOD_PRE_3[MOD_PRE_3<=0.5]=0

MOD_PRE_2[MOD_PRE_2>0.5]=1
MOD_PRE_2[MOD_PRE_2<=0.5]=0

MOD_PRE_1[MOD_PRE_1>0.5]=1
MOD_PRE_1[MOD_PRE_1<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_PRE_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_PRE_5.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 5 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_PRE_4.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 4 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_PRE_3.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_PRE_2.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 2 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

total_y_prob = MOD_PRE_1.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on accuracy weights and 1 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

In [None]:
# Get Kernel-Wise Metrics

Phen_y_prob[Phen_y_prob>0.5]=1
Phen_y_prob[Phen_y_prob<=0.5]=0

total_y_prob = Phen_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Phen_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

rsfMRI_y_prob[rsfMRI_y_prob>0.5]=1
rsfMRI_y_prob[rsfMRI_y_prob<=0.5]=0

total_y_prob = rsfMRI_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on rsfMRI_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

sMRI_y_prob[sMRI_y_prob>0.5]=1
sMRI_y_prob[sMRI_y_prob<=0.5]=0

total_y_prob = sMRI_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on sMRI_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

dMRI_y_prob[dMRI_y_prob>0.5]=1
dMRI_y_prob[dMRI_y_prob<=0.5]=0

total_y_prob = dMRI_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on dMRI_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

tsfMRI_y_prob[tsfMRI_y_prob>0.5]=1
tsfMRI_y_prob[tsfMRI_y_prob<=0.5]=0

total_y_prob = tsfMRI_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on tsfMRI_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))

Gene_y_prob[Gene_y_prob>0.5]=1
Gene_y_prob[Gene_y_prob<=0.5]=0

total_y_prob = Gene_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

ACC = (TP + TN) / (P + N)
PPV = TP / PP
TNR = TN / N
TPR = TP / P

print("Model based on Gene_y_prob weights and 6 variables: accuracy = %.2f; precision = %.2f; specificity = %.2f; sensitivity = %.2f." % (ACC, PPV, TNR, TPR))


In [None]:
# Get total model detailed metrics

MOD_COK_6 = 0.7143 * Phen_y_prob + 0.1190 * tsfMRI_y_prob + 0.0952 * rsfMRI_y_prob + 0.0476 * sMRI_y_prob + 0.0238 * Gene_y_prob + 0.0000 * dMRI_y_prob

# Derive case-status label

MOD_COK_6[MOD_COK_6>0.5]=1
MOD_COK_6[MOD_COK_6<=0.5]=0

# Pull necessary data then print relevant metrics
total_y_prob = MOD_COK_6.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')

TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)

print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

### Derive and save probabilities for ambiguous cases

In [None]:
# Phenotype kernel
train_model_coef = np.mean(Coef_Phen, axis=0)[0]
train_model_intercept = np.mean(Inter_Phen)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
Phen_train_np = Phen_df.loc[Phen_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(Phen_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(Phen_train_np[i],train_model_coef)+train_model_intercept))))
Phen_y_prob=np.array(train_y_prob)

# sMRI kernel
train_model_coef = np.mean(Coef_sMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_sMRI)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
sMRI_train_np = sMRI_df.loc[sMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(sMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(sMRI_train_np[i],train_model_coef)+train_model_intercept))))
sMRI_y_prob=np.array(train_y_prob)

# dMRI kernel
train_model_coef = np.mean(Coef_dMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_dMRI)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
dMRI_train_np = dMRI_df.loc[dMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(dMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(dMRI_train_np[i],train_model_coef)+train_model_intercept))))
dMRI_y_prob=np.array(train_y_prob)

# rsfMRIotype kernel
train_model_coef = np.mean(Coef_rsfMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_rsfMRI)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
rsfMRI_train_np = rsfMRI_df.loc[rsfMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(rsfMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(rsfMRI_train_np[i],train_model_coef)+train_model_intercept))))
rsfMRI_y_prob=np.array(train_y_prob)


# tsfMRI kernel
train_model_coef = np.mean(Coef_tsfMRI, axis=0)[0]
train_model_intercept = np.mean(Inter_tsfMRI)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
tsfMRI_train_np = tsfMRI_df.loc[tsfMRI_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(tsfMRI_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(tsfMRI_train_np[i],train_model_coef)+train_model_intercept))))
tsfMRI_y_prob=np.array(train_y_prob)


# Geneotype kernel
train_model_coef = np.mean(Coef_Gene, axis=0)[0]
train_model_intercept = np.mean(Inter_Gene)

sample_ids = list(batch_ex.loc[batch_ex['Experimental']==1]['SampleID'])
Gene_train_np = Gene_df.loc[Gene_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]

for i in range(len(Gene_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(Gene_train_np[i],train_model_coef)+train_model_intercept))))
Gene_y_prob=np.array(train_y_prob)

In [None]:
# Apply Cohen's Kappa Model 6
total_y_prob = 0.7143 * Phen_y_prob + 0.1190 * tsfMRI_y_prob + 0.0952 * rsfMRI_y_prob + 0.0476 * sMRI_y_prob + 0.0238 * Gene_y_prob + 0.0000 * dMRI_y_prob

print(total_y_prob)

total_y_prob[total_y_prob>0.5]=1
total_y_prob[total_y_prob<=0.5]=0
total_y_prob = total_y_prob.astype('int64')

In [None]:
# Save Results
PredAmbig = list(zip(sample_ids, total_y_prob))

df = pd.DataFrame(PredAmbig, columns = ["SampleID", "OCD"])

df.to_csv('Preprocessed/PredAmbig.csv', index=False)

In [None]:
print(Phen_df.shape)
print(sMRI_df.shape)
print(dMRI_df.shape)
print(rsfMRI_df.shape)
print(tsfMRI_df.shape)
print(Gene_df.shape)