In [None]:
import pandas as pd
import numpy as np
import pyreadr

In [None]:
def getData(data):
  return pyreadr.read_r(data)[None]

In [None]:
batch_df = getData("Batch.rds")
genetic_df = getData("Genetic.rds")
ocd_df = getData("OCD.rds")
que_df = getData("QUE.rds")
dmri_df  = getData("dMRI.rds")
smri_df  = getData("sMRI.rds")

In [None]:
batch_df.head()
batch_df['OCD']=batch_df['OCD'].fillna(1)
batch_df.head()

In [None]:
genetic_df.head()

In [None]:
ocd_df.head()
ocd_df['OCD']=ocd_df['OCD'].fillna(1)
ocd_df.head()

In [None]:
que_df.head()

In [None]:
dmri_df.head()

In [None]:
smri_df.head()

### QUESTIONNAIRE

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score
train_coef = []
train_intercept = []
for i in range(1,77):
  sample_ids = list(batch_df.loc[batch_df['Train_'+str(i)]==1]['SampleID'])
  que_np = que_df.loc[que_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
  que_np=que_np.astype('float64')
  np.isnan(que_np)
  que_np[np.isnan(que_np)]=1.0
  ocd_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
  ocd_np[ocd_np==2]=1
  ocd_np = ocd_np.astype('int')
  model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.8, max_iter=5000).fit(que_np,ocd_np)
  train_coef.append(model.coef_)
  train_intercept.append(model.intercept_)
  print("Progress ===> ",i," model completed")
print(train_coef)
print(train_intercept)
train_coef = np.concatenate(train_coef)
train_intercept = np.concatenate(train_intercept)

In [None]:
from sklearn.metrics import confusion_matrix
train_model_coef = np.mean(train_coef, axis=0)
train_model_intercept = np.mean(train_intercept)

sample_ids = list(batch_df.loc[batch_df['Valid']==1]['SampleID'])
que_train_np = que_df.loc[que_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
que_train_np = que_train_np.astype('float64')
np.isnan(que_train_np)
que_train_np[np.isnan(que_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(que_train_np[i],train_model_coef)+train_model_intercept))))
train_y_pred=np.array(train_y_prob)
train_y_pred[train_y_pred>0.5]=1
train_y_pred[train_y_pred<=0.5]=0
train_y_pred = train_y_pred.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')
print(accuracy_score(ocd_train_np,train_y_pred))
print(confusion_matrix(ocd_train_np,train_y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(ocd_train_np,train_y_pred).ravel()

print(confusion_matrix(ocd_train_np, train_y_pred))

In [None]:
from math import sqrt

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

In [None]:
with open('que_coef.npy', 'wb') as f:
    np.save(f, train_coef)
with open('que_intercept.npy', 'wb') as f:
    np.save(f, train_intercept)

### dMRI

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score
train_coef = []
train_intercept = []
for i in range(1,77):
  sample_ids = list(batch_df.loc[batch_df['Train_'+str(i)]==1]['SampleID'])
  dmri_np = dmri_df.loc[dmri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
  dmri_np=dmri_np.astype('float64')
  dmri_np[np.isnan(dmri_np)]=1.0
  ocd_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
  ocd_np[ocd_np==2]=1
  ocd_np = ocd_np.astype('int')
  model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.8, max_iter=5000).fit(dmri_np,ocd_np)
  train_coef.append(model.coef_)
  train_intercept.append(model.intercept_)
  print("Progress ===> ",i," model completed")
print(train_coef)
print(train_intercept)
train_coef = np.concatenate(train_coef)
train_intercept = np.concatenate(train_intercept)

In [None]:
from sklearn.metrics import confusion_matrix
train_model_coef = np.mean(train_coef, axis=0)
train_model_intercept = np.mean(train_intercept)

sample_ids = list(batch_df.loc[batch_df['Valid']==1]['SampleID'])
dmri_train_np = dmri_df.loc[dmri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
dmri_train_np = dmri_train_np.astype('float64')
np.isnan(dmri_train_np)
dmri_train_np[np.isnan(dmri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(dmri_train_np[i],train_model_coef)+train_model_intercept))))
train_y_pred=np.array(train_y_prob)
train_y_pred[train_y_pred>0.5]=1
train_y_pred[train_y_pred<=0.5]=0
train_y_pred = train_y_pred.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')
print(accuracy_score(ocd_train_np,train_y_pred))
print(confusion_matrix(ocd_train_np,train_y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(ocd_train_np,train_y_pred).ravel()

print(confusion_matrix(ocd_train_np, train_y_pred))

In [None]:
from math import sqrt

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

In [None]:
with open('dMRI_coef.npy', 'wb') as f:
    np.save(f, train_coef)
with open('dMRI_intercept.npy', 'wb') as f:
    np.save(f, train_intercept)

### sMRI

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score
train_coef = []
train_intercept = []
for i in range(1,77):
  sample_ids = list(batch_df.loc[batch_df['Train_'+str(i)]==1]['SampleID'])
  smri_np = smri_df.loc[smri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
  smri_np = smri_np.astype('float64')
  smri_np[np.isnan(smri_np)]=1.0
  ocd_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
  ocd_np[ocd_np==2]=1
  ocd_np = ocd_np.astype('int')
  model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.8, max_iter=5000).fit(smri_np,ocd_np)
  train_coef.append(model.coef_)
  train_intercept.append(model.intercept_)
  print("Progress ===> ",i," model completed")
print(train_coef)
print(train_intercept)
train_coef = np.concatenate(train_coef)
train_intercept = np.concatenate(train_intercept)

In [None]:
from sklearn.metrics import confusion_matrix
train_model_coef = np.mean(train_coef, axis=0)
train_model_intercept = np.mean(train_intercept)

sample_ids = list(batch_df.loc[batch_df['Valid']==1]['SampleID'])
smri_train_np = smri_df.loc[smri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
smri_train_np = smri_train_np.astype('float64')
np.isnan(smri_train_np)
smri_train_np[np.isnan(smri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(smri_train_np[i],train_model_coef)+train_model_intercept))))
train_y_pred=np.array(train_y_prob)
train_y_pred[train_y_pred>0.5]=1
train_y_pred[train_y_pred<=0.5]=0
train_y_pred = train_y_pred.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')
print(accuracy_score(ocd_train_np,train_y_pred))
print(confusion_matrix(ocd_train_np,train_y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(ocd_train_np,train_y_pred).ravel()

print(confusion_matrix(ocd_train_np, train_y_pred))

In [None]:
from math import sqrt

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

In [None]:
with open('sMRI_coef.npy', 'wb') as f:
    np.save(f, train_coef)
with open('sMRI_intercept.npy', 'wb') as f:
    np.save(f, train_intercept)

### GENETICS

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score
train_coef = []
train_intercept = []
for i in range(1,77):
  sample_ids = list(batch_df.loc[batch_df['Train_'+str(i)]==1]['SampleID'])
  genetic_np = genetic_df.loc[genetic_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
  ocd_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
  ocd_np[ocd_np==2]=1
  ocd_np = ocd_np.astype('int')
  model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.8, max_iter=5000).fit(genetic_np,ocd_np)
  train_coef.append(model.coef_)
  train_intercept.append(model.intercept_)
  print("Progress ===> ",i," model completed")
print(train_coef)
print(train_intercept)
train_coef = np.concatenate(train_coef)
train_intercept = np.concatenate(train_intercept)

In [None]:
from sklearn.metrics import confusion_matrix
train_model_coef = np.mean(train_coef, axis=0)
train_model_intercept = np.mean(train_intercept)

sample_ids = list(batch_df.loc[batch_df['Valid']==1]['SampleID'])
genetic_train_np = genetic_df.loc[genetic_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(genetic_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(genetic_train_np[i],train_model_coef)+train_model_intercept))))
train_y_pred=np.array(train_y_prob)
train_y_pred[train_y_pred>0.5]=1
train_y_pred[train_y_pred<=0.5]=0
train_y_pred = train_y_pred.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')
print(accuracy_score(ocd_train_np,train_y_pred))
print(confusion_matrix(ocd_train_np,train_y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(ocd_train_np,train_y_pred).ravel()

print(confusion_matrix(ocd_train_np, train_y_pred))

In [None]:
from math import sqrt

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

In [None]:
with open('gen_coef.npy', 'wb') as f:
    np.save(f, train_coef)
with open('gen_intercept.npy', 'wb') as f:
    np.save(f, train_intercept)

### COMBINE

In [None]:
# Weights
acc_gen = 0.61
acc_smri = 0.61
acc_dmri = 0.54
acc_que = 0.45
acc_tot = acc_gen + acc_smri + acc_dmri + acc_que

w_gen = acc_gen / acc_tot
w_smri = acc_smri / acc_tot
w_dmri = acc_dmri / acc_tot
w_que = acc_que / acc_tot

print(w_gen, w_smri, w_dmri, w_que)

In [None]:
# Import coefficients
gen_coef = None
gen_intercept = None
que_coef = None
que_intercept = None
smri_coef = None
smri_intercept = None
dmri_coef = None
dmri_intercept = None

with open('gen_coef.npy', 'rb') as f:
    gen_coef = np.load(f)
with open('gen_intercept.npy', 'rb') as f:
    gen_intercept = np.load(f)
    
with open('que_coef.npy', 'rb') as f:
    que_coef = np.load(f)
with open('que_intercept.npy', 'rb') as f:
    que_intercept = np.load(f)

with open('smri_coef.npy', 'rb') as f:
    smri_coef = np.load(f)
with open('smri_intercept.npy', 'rb') as f:
    smri_intercept = np.load(f)
    
with open('dmri_coef.npy', 'rb') as f:
    dmri_coef = np.load(f)
with open('dmri_intercept.npy', 'rb') as f:
    dmri_intercept = np.load(f)

In [None]:
from sklearn.metrics import confusion_matrix

# gen
train_model_coef = np.mean(gen_coef, axis=0)
train_model_intercept = np.mean(gen_intercept)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
genetic_train_np = genetic_df.loc[genetic_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(genetic_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(genetic_train_np[i],train_model_coef)+train_model_intercept))))
gen_y_prob=np.array(train_y_prob)

# sMRI
train_model_coef = np.mean(smri_coef, axis=0)
train_model_intercept = np.mean(smri_intercept)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
smri_train_np = smri_df.loc[smri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
smri_train_np = smri_train_np.astype('float64')
np.isnan(smri_train_np)
smri_train_np[np.isnan(smri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(smri_train_np[i],train_model_coef)+train_model_intercept))))
smri_y_prob=np.array(train_y_prob)

# dMRI
train_model_coef = np.mean(dmri_coef, axis=0)
train_model_intercept = np.mean(dmri_intercept)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
dmri_train_np = dmri_df.loc[dmri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
dmri_train_np = dmri_train_np.astype('float64')
np.isnan(dmri_train_np)
dmri_train_np[np.isnan(dmri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(dmri_train_np[i],train_model_coef)+train_model_intercept))))
dmri_y_prob=np.array(train_y_prob)

# que
train_model_coef = np.mean(que_coef, axis=0)
train_model_intercept = np.mean(que_intercept)

sample_ids = list(batch_df.loc[batch_df['Test']==1]['SampleID'])
que_train_np = que_df.loc[que_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
que_train_np = que_train_np.astype('float64')
np.isnan(que_train_np)
que_train_np[np.isnan(que_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
ocd_train_np[ocd_train_np==2]=1
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(que_train_np[i],train_model_coef)+train_model_intercept))))
que_y_prob=np.array(train_y_prob)

In [None]:
# Scale Probabilities
wgen_y_prob = gen_y_prob * w_gen
wsmri_y_prob = smri_y_prob * w_smri
wdmri_y_prob = dmri_y_prob * w_dmri
wque_y_prob = que_y_prob * w_que

total_y_prob = wgen_y_prob + wsmri_y_prob + wdmri_y_prob + wque_y_prob

print(total_y_prob)

total_y_prob[total_y_prob>0.5]=1
total_y_prob[total_y_prob<=0.5]=0
total_y_prob = total_y_prob.astype('int64')
ocd_train_np = ocd_train_np.astype('int64')
print(accuracy_score(ocd_train_np,total_y_prob))
print(confusion_matrix(ocd_train_np,total_y_prob))

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(ocd_train_np,total_y_prob).ravel()

print(confusion_matrix(ocd_train_np, total_y_prob))

In [None]:
from math import sqrt

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)

### AMBIGUOUS CASES

In [None]:
from sklearn.metrics import confusion_matrix

# gen
train_model_coef = np.mean(gen_coef, axis=0)
train_model_intercept = np.mean(gen_intercept)

sample_ids = list(batch_df.loc[batch_df['Experimental']==1]['SampleID'])
genetic_train_np = genetic_df.loc[genetic_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]
for i in range(len(genetic_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(genetic_train_np[i],train_model_coef)+train_model_intercept))))
gen_y_prob=np.array(train_y_prob)

# sMRI
train_model_coef = np.mean(smri_coef, axis=0)
train_model_intercept = np.mean(smri_intercept)

sample_ids = list(batch_df.loc[batch_df['Experimental']==1]['SampleID'])
smri_train_np = smri_df.loc[smri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
smri_train_np = smri_train_np.astype('float64')
np.isnan(smri_train_np)
smri_train_np[np.isnan(smri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(smri_train_np[i],train_model_coef)+train_model_intercept))))
smri_y_prob=np.array(train_y_prob)

# dMRI
train_model_coef = np.mean(dmri_coef, axis=0)
train_model_intercept = np.mean(dmri_intercept)

sample_ids = list(batch_df.loc[batch_df['Experimental']==1]['SampleID'])
dmri_train_np = dmri_df.loc[dmri_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
dmri_train_np = dmri_train_np.astype('float64')
np.isnan(dmri_train_np)
dmri_train_np[np.isnan(dmri_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(dmri_train_np[i],train_model_coef)+train_model_intercept))))
dmri_y_prob=np.array(train_y_prob)

# que
train_model_coef = np.mean(que_coef, axis=0)
train_model_intercept = np.mean(que_intercept)

sample_ids = list(batch_df.loc[batch_df['Experimental']==1]['SampleID'])
que_train_np = que_df.loc[que_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:]
que_train_np = que_train_np.astype('float64')
np.isnan(que_train_np)
que_train_np[np.isnan(que_train_np)]=1.0
ocd_train_np = np.concatenate(ocd_df.loc[ocd_df['SampleID'].isin(sample_ids)].to_numpy()[:,1:])
train_y_prob=[]
for i in range(len(que_train_np)):
  train_y_prob.append(1/(1+np.exp(-(np.dot(que_train_np[i],train_model_coef)+train_model_intercept))))
que_y_prob=np.array(train_y_prob)

In [None]:
# Scale Probabilities
wgen_y_prob = gen_y_prob * w_gen
wsmri_y_prob = smri_y_prob * w_smri
wdmri_y_prob = dmri_y_prob * w_dmri
wque_y_prob = que_y_prob * w_que

total_y_prob = wgen_y_prob + wsmri_y_prob + wdmri_y_prob + wque_y_prob

print(total_y_prob)

total_y_prob[total_y_prob>0.5]=1
total_y_prob[total_y_prob<=0.5]=0
total_y_prob = total_y_prob.astype('int64')

In [None]:
# Combine SampleIDs 

PredAmbig = list(zip(sample_ids, total_y_prob))

df = pd.DataFrame(PredAmbig, columns = ["SampleID", "OCD"])

df.to_csv('PredAmbig.csv', index=False)


In [None]:
TP=1
FN=1
FP=1
TF=1

P = TP + FN
N = FP + TN
PP = TP + FP
PN = FN + TN

print("Population =", P+N)
print("Prevalence =", P/(P+N))

CK = (2*((TP * TN) - (TN * FP)))/(((TP + FP) * (FP + TN)) + ((TP + FN) * (FN+TN)))
ACC = (TP + TN) / (P + N)
PPV = TP / PP    
FOR = FN / PN
FDR = FP / PP
NPV = TN / PN
TPR = TP / P
FPR = FP / N
FNR = FN / P
TNR = TN / N
LRp = TPR / FPR
LRn = FNR / TNR
MK = PPV + NPV - 1
BM = TPR + TNR - 1
PT = (sqrt(TPR+FPR) - FPR) / (TPR - FPR)
DOR = LRp/LRn
BA = (TPR + TNR) / 2
FS = (2*PPV * TPR) / (PPV + TPR)
FM = sqrt(PPV * TPR)
MCC = sqrt(TPR*TNR*PPV*NPV) - sqrt(FPR*FNR*FDR*FOR)
TS = TP / (TP + FN + FP)
       
print(CK)
print(ACC)
print(PPV)
print(FOR)
print(FDR)
print(NPV)
print(TPR)
print(FPR)
print(TNR)
print(FNR)
print(LRp)
print(LRn)
print(MK)
print(BM)
print(PT)
print(DOR)
print(BA)
print(FS)
print(FM)
print(MCC)
print(TS)