In [1]:
# Handle table-like data and matrices
import numpy as np
import pandas as pd
from sympy import *
from scipy.stats import fisher_exact
# Helpers
import os
import sys
sys.path.insert(0,'../')
from scipy.special import digamma,betaln
import time

# Prediction
from classifiers import MAP_estimator,cal_p_value,TCRs_selection,LOOCV_MAP

from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score,confusion_matrix,log_loss
from sklearn.model_selection import LeaveOneOut,KFold,StratifiedKFold

# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

# import plotly
# plotly.tools.set_credentials_file(username='tracyqin326', api_key='EICCf5vuIzI5hVfA4gYC')
# import plotly.plotly as py
# import plotly.graph_objs as go

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

#  Data load

In [3]:
train_origin = pd.read_csv('../data/'+'train.csv')
count_df = pd.read_pickle('../data/'+'count_df.pkl')
priors_init = [[1.35,19541.5],[18.5,12364.7]]
threshold = 0.2

In [4]:
LOOCV_MAP(train_origin,count_df,threshold,priors_init_value=priors_init)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.23216636e+00 1.23646757e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.870

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors_c0: [4.64666746e-01 1.95415001e+04] priors_c1: [   13.500004   12364.70632461]
y_true: 0  y_pred: 0  y_proba_c1: 0.044

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors_c0: [6.98554541e-01 1.95414999e+04] priors_c1: [7.81674405e+00 1.23646782e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.679

Length of associated TCRs in this round: 69
test sample:  RA8  unique_TCRs:  6143  associated_TCRs:  1
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.17435551e+00 1.23646761e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.672

Length of associated TCRs in this round: 69
test sample:  RA63  unique_TCRs

Length of associated TCRs in this round: 68
test sample:  RA22  unique_TCRs:  16752  associated_TCRs:  4
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.13999560e+00 1.23646764e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.866

Length of associated TCRs in this round: 67
test sample:  RA31  unique_TCRs:  5152  associated_TCRs:  4
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [7.90597365e+00 1.23646777e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.998

Length of associated TCRs in this round: 67
test sample:  RA81  unique_TCRs:  10091  associated_TCRs:  5
priors_c0: [6.98554541e-01 1.95414999e+04] priors_c1: [7.88758210e+00 1.23646779e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.996

Length of associated TCRs in this round: 68
test sample:  RA37  unique_TCRs:  1776  associated_TCRs:  3
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [7.96604545e+00 1.23646775e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.999

Length of associated TCRs in this round: 60
test sample:  RA52  unique_TCR

Length of associated TCRs in this round: 69
test sample:  RA11  unique_TCRs:  7655  associated_TCRs:  2
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.17407782e+00 1.23646761e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.885

Length of associated TCRs in this round: 34
test sample:  HC4  unique_TCRs:  16001  associated_TCRs:  4
priors_c0: [6.41209149e-01 1.95415000e+04] priors_c1: [   13.50000328 12364.70572609]
y_true: 0  y_pred: 0  y_proba_c1: 0.445

Length of associated TCRs in this round: 69
test sample:  RA19  unique_TCRs:  5104  associated_TCRs:  1
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.16136700e+00 1.23646762e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.764

Length of associated TCRs in this round: 68
test sample:  RA30  unique_TCRs:  6572  associated_TCRs:  2
priors_c0: [7.86867342e-01 1.95414999e+04] priors_c1: [8.07148315e+00 1.23646768e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.925

Length of associated TCRs in this round: 62
test sample:  RA24  unique_TCRs:

# Test different prior initialization methods

## Method 1: partial derivative of objective function with respect to a,b = 0

In [29]:
# Test with a sample of negative class
n = train_cv_c0['unique_TCRs'].tolist()
k = train_cv_c0['phenotype_associated_TCRs'].tolist()
N_l = len(n)

In [11]:
# gradient a
def fun_a(a,b):
    sum_a = 0
    for i in range(N_l):
        sum_a += digamma(k[i] + a) - digamma(n[i] + k[i] + a + b)
        
    gradient_a = -(-N_l * (digamma(a) - digamma(a + b)) + sum_a)
    
    return gradient_a

# gradient b
def fun_b(a,b):

    sum_b = 0
    for i in range(N_l):
        sum_b += digamma(n[i] - k[i] + b) - digamma(n[i] + k[i] + a + b)
        
    gradient_b = -(-N_l * (digamma(b) - digamma(a + b)) + sum_b)
    
    return gradient_b

In [32]:
a = Symbol("a",positive=True)
b = Symbol("b",positive=True)
solve([-N_l * (digamma(a) - digamma(a + b)) +sum([digamma(k[i] + a) - digamma(n[i] + k[i] + a + b) for i in range(len(n))]),-N_l * (digamma(b) - digamma(a + b)) +sum([digamma(n[i] - k[i] + b) - digamma(n[i] + k[i] + a + b) for i in range(len(n))])],[a,b])

TypeError: ufunc 'psi' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

### Method 1 Failed (the first time a NoteImplementError occured)

## Method 2: a, b estimation based on some relationship with population parameters   (assume N as mean of n)

### Method 2.1 the relationship with factorial moments

In [40]:
def prior_init_estimation(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    N_c0 = np.mean(n_c0)

    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    N_c1 = np.mean(n_c1)
    
    E_x0_c0 = 1
    E_x1_c0 = np.mean(np.array(k_c0))
    E_x2_c0 = np.mean(np.array(k_c0)*(np.array(k_c0)-1))
    sigma0_c0 = E_x1_c0/E_x0_c0
    sigma1_c0 = E_x2_c0/E_x1_c0

    E_x0_c1 = 1
    E_x1_c1 = np.mean(np.array(k_c1))
    E_x2_c1 = np.mean(np.array(k_c1)*(np.array(k_c1)-1))
    sigma0_c1 = E_x1_c1/E_x0_c1
    sigma1_c1 = E_x2_c1/E_x1_c1
    
    a_c0 = sigma0_c0*(N_c0-1-sigma1_c0)/(sigma0_c0+N_c0*(sigma1_c0-sigma0_c0))
    b_c0 = (N_c0-sigma0_c0)*(N_c0-1-sigma1_c0)/(sigma0_c0+N_c0*(sigma1_c0-sigma0_c0))

    a_c1 = sigma0_c1*(N_c1-1-sigma1_c1)/(sigma0_c1+N_c1*(sigma1_c1-sigma0_c1))
    b_c1 = (N_c1-sigma0_c1)*(N_c1-1-sigma1_c1)/(sigma0_c1+N_c1*(sigma1_c1-sigma0_c1))
    
    return [[a_c0,b_c0],[a_c1,b_c1]]

In [48]:
Loocv_MAP(train_origin,TCRs,threshold,prior_init_estimation)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.8284452198562606, 4489.408080434205]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.26932014e+00 4.48940504e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.915  y_proba_c1_v2: 0.915

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.43777159737869187, 18284.8927206018], [4.617767152900428, 13824.298640442716]]
priors_c0: [4.40676708e-01 1.82848927e+04] priors_c1: [5.09985110e+00 1.38242967e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.871  y_proba_c1_v2: 0.871

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.8031399721553325, 22419.93915842045], [2.6975230724189987, 4498.769728818299]]
priors_c0: [7.86373912e-01 2.24199391e+04] priors_c1: [3.11572957e

Length of associated TCRs in this round: 67
test sample:  RA9  unique_TCRs:  11509  associated_TCRs:  5
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.851749260798811, 4668.6131400445465]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.27834868e+00 4.66861015e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.993  y_proba_c1_v2: 0.993

Length of associated TCRs in this round: 68
test sample:  RA1  unique_TCRs:  3629  associated_TCRs:  2
priors initialization:  [[0.8031399721553325, 22419.93915842045], [2.942099589371645, 4824.066413192331]]
priors_c0: [7.86373912e-01 2.24199391e+04] priors_c1: [3.37592139e+00 4.82406335e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.981  y_proba_c1_v2: 0.981

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.920680571289356, 4835.753641409838]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.36794429e+00 4

Length of associated TCRs in this round: 68
test sample:  RA4  unique_TCRs:  12071  associated_TCRs:  5
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.752925972604463, 4451.820119359583]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.18150866e+00 4.45181686e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.992  y_proba_c1_v2: 0.992

Length of associated TCRs in this round: 69
test sample:  RA66  unique_TCRs:  14050  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.7920165509006587, 4444.540684721971]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.23006675e+00 4.44453768e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.952  y_proba_c1_v2: 0.952

Length of associated TCRs in this round: 66
test sample:  RA62  unique_TCRs:  9287  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.6781704266964845, 4439.233223641252]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.10307006e

Length of associated TCRs in this round: 70
test sample:  RA69  unique_TCRs:  816  associated_TCRs:  0
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.9761464127120605, 4722.717609723055]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.41570825e+00 4.72271440e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.649  y_proba_c1_v2: 0.649

Length of associated TCRs in this round: 37
test sample:  HC10  unique_TCRs:  32310  associated_TCRs:  7
priors initialization:  [[0.5254538969667846, 15698.241686655918], [3.563341309510995, 8889.1091468808]]
priors_c0: [5.68779592e-01 1.56982416e+04] priors_c1: [4.01692052e+00 8.88910737e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.945  y_proba_c1_v2: 0.945

Length of associated TCRs in this round: 70
test sample:  RA12  unique_TCRs:  5154  associated_TCRs:  6
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.7810596877841376, 4440.3205534845]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.20002726e+00 4

### Method 2.1 only get 0.62 cv auroc, dropped

### Method 2.2: relationship with moments

In [46]:
def prior_init_estimation2(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    N_c0 = np.mean(n_c0)
    
    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    N_c1 = np.mean(n_c1)
    
    mu1_c0 = np.mean(np.array(k_c0))
    mu2_c0 = np.mean(np.array(k_c0)**2)
    
    mu1_c1 = np.mean(np.array(k_c1))
    mu2_c1 = np.mean(np.array(k_c1)**2)
    
    a_c0 = (N_c0*mu1_c0-mu2_c0)/(N_c0*((mu2_c0/mu1_c0)-mu1_c0-1)+mu1_c0)
    b_c0 = (N_c0-mu1_c0)*(N_c0-(mu2_c0/mu1_c0))/(N_c0*((mu2_c0/mu1_c0)-mu1_c0-1)+mu1_c0)
    
    a_c1 = (N_c1*mu1_c1-mu2_c1)/(N_c1*((mu2_c1/mu1_c1)-mu1_c1-1)+mu1_c1)
    b_c1 = (N_c1-mu1_c1)*(N_c1-(mu2_c1/mu1_c1))/(N_c1*((mu2_c1/mu1_c1)-mu1_c1-1)+mu1_c1)
    
    return [[a_c0,b_c0],[a_c1,b_c1]]

In [49]:
Loocv_MAP(train_origin,TCRs,threshold,prior_init_estimation2)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.8284452198562606, 4489.408080434205]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.26932014e+00 4.48940504e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.915  y_proba_c1_v2: 0.915

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.43777159737869187, 18284.8927206018], [4.617767152900424, 13824.298640442701]]
priors_c0: [4.40676708e-01 1.82848927e+04] priors_c1: [5.09985107e+00 1.38242967e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.871  y_proba_c1_v2: 0.871

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.8031399721553325, 22419.93915842045], [2.6975230724189987, 4498.769728818299]]
priors_c0: [7.86373912e-01 2.24199391e+04] priors_c1: [3.11572957e

Length of associated TCRs in this round: 67
test sample:  RA9  unique_TCRs:  11509  associated_TCRs:  5
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.851749260798811, 4668.6131400445465]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.27834868e+00 4.66861015e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.993  y_proba_c1_v2: 0.993

Length of associated TCRs in this round: 68
test sample:  RA1  unique_TCRs:  3629  associated_TCRs:  2
priors initialization:  [[0.8031399721553325, 22419.93915842045], [2.942099589371645, 4824.066413192331]]
priors_c0: [7.86373912e-01 2.24199391e+04] priors_c1: [3.37592139e+00 4.82406335e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.981  y_proba_c1_v2: 0.981

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.920680571289356, 4835.753641409838]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.36794429e+00 4

Length of associated TCRs in this round: 68
test sample:  RA4  unique_TCRs:  12071  associated_TCRs:  5
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.752925972604463, 4451.820119359583]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.18150866e+00 4.45181686e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.992  y_proba_c1_v2: 0.992

Length of associated TCRs in this round: 69
test sample:  RA66  unique_TCRs:  14050  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.7920165509006587, 4444.540684721971]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.23006675e+00 4.44453768e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.952  y_proba_c1_v2: 0.952

Length of associated TCRs in this round: 66
test sample:  RA62  unique_TCRs:  9287  associated_TCRs:  4
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.6781704266964845, 4439.233223641252]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.10307006e

Length of associated TCRs in this round: 70
test sample:  RA69  unique_TCRs:  816  associated_TCRs:  0
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.9761464127120605, 4722.717609723055]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.41570825e+00 4.72271440e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.649  y_proba_c1_v2: 0.649

Length of associated TCRs in this round: 37
test sample:  HC10  unique_TCRs:  32310  associated_TCRs:  7
priors initialization:  [[0.5254538969667845, 15698.241686655916], [3.5633413095109963, 8889.109146880806]]
priors_c0: [5.68779592e-01 1.56982416e+04] priors_c1: [4.01692059e+00 8.88910737e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.945  y_proba_c1_v2: 0.945

Length of associated TCRs in this round: 70
test sample:  RA12  unique_TCRs:  5154  associated_TCRs:  6
priors initialization:  [[1.0463234952426572, 27261.191364650924], [2.7810596877841376, 4440.3205534845]]
priors_c0: [1.04962113e+00 2.72611913e+04] priors_c1: [3.20002726e+0

### Also 0.62

### Method 2.3: based on the expectation of k is ratio a/(a+b)

In [54]:
def prior_init_estimation3(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    
    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    
    
    a_c0 = np.mean(k_c0)
    b_c0 = np.mean(n_c0)-a_c0
    
    a_c1 = np.mean(k_c1)
    b_c1 = np.mean(n_c1)-a_c1
    
    return [[a_c0,b_c0],[a_c1,b_c1]]

In [55]:
Loocv_MAP(train_origin,TCRs,threshold,prior_init_estimation3)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[0.75, 19540.7], [8.234375, 13069.890625]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [8.68132164e+00 1.30698830e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.867  y_proba_c1_v2: 0.867

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.47368421052631576, 19784.894736842103], [4.384615384615385, 13126.307692307693]]
priors_c0: [4.69321421e-01 1.97848947e+04] priors_c1: [4.86564550e+00 1.31263058e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.872  y_proba_c1_v2: 0.872

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.7, 19540.75], [7.78125, 12977.109375]]
priors_c0: [6.98543967e-01 1.95407500e+04] priors_c1: [8.18729501e+00 1.29771035e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.669  y_proba_c1_v2: 0.

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[0.75, 19540.7], [7.96875, 13193.8125]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [8.42111803e+00 1.31938053e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.965  y_proba_c1_v2: 0.965

Length of associated TCRs in this round: 62
test sample:  RA48  unique_TCRs:  23112  associated_TCRs:  5
priors initialization:  [[0.75, 19540.7], [7.5625, 12967.171875]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [8.01479805e+00 1.29671662e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.830  y_proba_c1_v2: 0.830

Length of associated TCRs in this round: 67
test sample:  RA6  unique_TCRs:  7346  associated_TCRs:  3
priors initialization:  [[0.75, 19540.7], [8.0625, 13213.015625]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [8.49110937e+00 1.32130084e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.978  y_proba_c1_v2: 0.978

Length of associated TCRs in this round: 32
t

Length of associated TCRs in this round: 30
test sample:  HC5  unique_TCRs:  10600  associated_TCRs:  0
priors initialization:  [[0.5263157894736842, 20011.526315789473], [4.2615384615384615, 13126.43076923077]]
priors_c0: [5.56606098e-01 2.00115262e+04] priors_c1: [4.74191207e+00 1.31264290e+04]
y_true: 0  y_pred: 0  y_proba_c1: 0.202  y_proba_c1_v2: 0.202

Length of associated TCRs in this round: 69
test sample:  RA16  unique_TCRs:  4701  associated_TCRs:  3
priors initialization:  [[0.75, 19540.7], [8.25, 13254.15625]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [8.65790465e+00 1.32541497e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.993  y_proba_c1_v2: 0.993

Length of associated TCRs in this round: 31
test sample:  HC15  unique_TCRs:  8726  associated_TCRs:  1
priors initialization:  [[0.5263157894736842, 20110.157894736843], [4.369230769230769, 13126.323076923078]]
priors_c0: [5.55704684e-01 2.01101578e+04] priors_c1: [4.85460692e+00 1.31263212e+04]
y_true: 0  y_pred: 1  y_p

Length of associated TCRs in this round: 62
test sample:  RA24  unique_TCRs:  19594  associated_TCRs:  6
priors initialization:  [[0.75, 19540.7], [7.546875, 13022.15625]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [7.97234687e+00 1.30221507e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.969  y_proba_c1_v2: 0.969

Length of associated TCRs in this round: 40
test sample:  HC11  unique_TCRs:  42732  associated_TCRs:  12
priors initialization:  [[0.7894736842105263, 18320.105263157897], [5.923076923076923, 13124.769230769232]]
priors_c0: [7.51084330e-01 1.83201052e+04] priors_c1: [6.47146096e+00 1.31247364e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.980  y_proba_c1_v2: 0.980

Length of associated TCRs in this round: 57
test sample:  RA55  unique_TCRs:  35579  associated_TCRs:  9
priors initialization:  [[0.75, 19540.7], [7.03125, 12772.90625]]
priors_c0: [7.86840274e-01 1.95406999e+04] priors_c1: [7.48069497e+00 1.27729013e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.939  y_proba_c1_v2: 0.9

### 0.61

## Method 2_modified: using median as N

In [10]:
def prior_init_estimation(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    N_c0 = np.median(n_c0)

    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    N_c1 = np.median(n_c1)
    
    E_x0_c0 = 1
    E_x1_c0 = np.mean(np.array(k_c0))
    E_x2_c0 = np.mean(np.array(k_c0)*(np.array(k_c0)-1))
    sigma0_c0 = E_x1_c0/E_x0_c0
    sigma1_c0 = E_x2_c0/E_x1_c0

    E_x0_c1 = 1
    E_x1_c1 = np.mean(np.array(k_c1))
    E_x2_c1 = np.mean(np.array(k_c1)*(np.array(k_c1)-1))
    sigma0_c1 = E_x1_c1/E_x0_c1
    sigma1_c1 = E_x2_c1/E_x1_c1
    
    a_c0 = sigma0_c0*(N_c0-1-sigma1_c0)/(sigma0_c0+N_c0*(sigma1_c0-sigma0_c0))
    b_c0 = (N_c0-sigma0_c0)*(N_c0-1-sigma1_c0)/(sigma0_c0+N_c0*(sigma1_c0-sigma0_c0))

    a_c1 = sigma0_c1*(N_c1-1-sigma1_c1)/(sigma0_c1+N_c1*(sigma1_c1-sigma0_c1))
    b_c1 = (N_c1-sigma0_c1)*(N_c1-1-sigma1_c1)/(sigma0_c1+N_c1*(sigma1_c1-sigma0_c1))
    
    return [[a_c0,b_c0],[a_c1,b_c1]]


def prior_init_estimation2(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    N_c0 = np.median(n_c0)
    
    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    N_c1 = np.median(n_c1)
    
    mu1_c0 = np.mean(np.array(k_c0))
    mu2_c0 = np.mean(np.array(k_c0)**2)
    
    mu1_c1 = np.mean(np.array(k_c1))
    mu2_c1 = np.mean(np.array(k_c1)**2)
    
    a_c0 = (N_c0*mu1_c0-mu2_c0)/(N_c0*((mu2_c0/mu1_c0)-mu1_c0-1)+mu1_c0)
    b_c0 = (N_c0-mu1_c0)*(N_c0-(mu2_c0/mu1_c0))/(N_c0*((mu2_c0/mu1_c0)-mu1_c0-1)+mu1_c0)
    
    a_c1 = (N_c1*mu1_c1-mu2_c1)/(N_c1*((mu2_c1/mu1_c1)-mu1_c1-1)+mu1_c1)
    b_c1 = (N_c1-mu1_c1)*(N_c1-(mu2_c1/mu1_c1))/(N_c1*((mu2_c1/mu1_c1)-mu1_c1-1)+mu1_c1)
    
    return [[a_c0,b_c0],[a_c1,b_c1]]


def prior_init_estimation3(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    
    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    
    a_c0 = np.mean(k_c0)
    b_c0 = np.median(n_c0)-a_c0
    
    a_c1 = np.mean(k_c1)
    b_c1 = np.median(n_c1)-a_c1
    
    return [[a_c0,b_c0],[a_c1,b_c1]]

In [11]:
Loocv_MAP(train_origin,TCRs,threshold,prior_init_estimation)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.8279598451322396, 3903.5558590186743]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.89842912e+00 3.90355401e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.907  y_proba_c1_v2: 0.907

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.43775908136835884, 15378.476528470448], [4.617224251415488, 12114.948404240356]]
priors_c0: [4.37759081e-01 1.53784765e+04] priors_c1: [4.52429381e+00 1.21149457e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.855  y_proba_c1_v2: 0.855

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.8031126020710502, 18724.340856543087], [2.69710034254103, 3939.878687125147]]
priors_c0: [8.03112602e-01 1.87243409e+04] priors_c1: [2.7825464

Length of associated TCRs in this round: 67
test sample:  RA9  unique_TCRs:  11509  associated_TCRs:  5
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.851338790318113, 4135.206780500298]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.95198343e+00 4.13520487e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.991  y_proba_c1_v2: 0.991

Length of associated TCRs in this round: 68
test sample:  RA1  unique_TCRs:  3629  associated_TCRs:  2
priors initialization:  [[0.8031126020710502, 18724.340856543087], [2.941685518504801, 4282.151412633972]]
priors_c0: [8.03112602e-01 1.87243409e+04] priors_c1: [3.04508862e+00 4.28214945e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.974  y_proba_c1_v2: 0.974

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.9202930716431865, 4317.739197397147]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [3.05360954e+00 

Length of associated TCRs in this round: 68
test sample:  RA4  unique_TCRs:  12071  associated_TCRs:  5
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.7524417069371436, 3850.4965669769445]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.81057539e+00 3.85049499e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.989  y_proba_c1_v2: 0.989

Length of associated TCRs in this round: 69
test sample:  RA66  unique_TCRs:  14050  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.791528060513221, 3853.26748362011]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.85709122e+00 3.85326568e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.945  y_proba_c1_v2: 0.945

Length of associated TCRs in this round: 66
test sample:  RA62  unique_TCRs:  9287  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.677817991120899, 3967.0163733255504]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.81823745e

Length of associated TCRs in this round: 70
test sample:  RA69  unique_TCRs:  816  associated_TCRs:  0
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.975704998000635, 4178.305418449595]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [3.07228257e+00 4.17830339e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.649  y_proba_c1_v2: 0.649

Length of associated TCRs in this round: 37
test sample:  HC10  unique_TCRs:  32310  associated_TCRs:  7
priors initialization:  [[0.5254371275731463, 13311.380403510793], [3.5629089694189924, 7789.88374713528]]
priors_c0: [5.25437128e-01 1.33113804e+04] priors_c1: [3.57332579e+00 7.78988246e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.934  y_proba_c1_v2: 0.934

Length of associated TCRs in this round: 70
test sample:  RA12  unique_TCRs:  5154  associated_TCRs:  6
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.7806666844064183, 3948.5938218009173]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.89379431e+

In [30]:
Loocv_MAP(train_origin,TCRs,threshold,prior_init_estimation2)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.8279598451322396, 3903.5558590186743]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.89842912e+00 3.90355401e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.907  y_proba_c1_v2: 0.907

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.43775908136835884, 15378.476528470448], [4.617224251415483, 12114.948404240344]]
priors_c0: [4.37759081e-01 1.53784765e+04] priors_c1: [4.52429381e+00 1.21149457e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.855  y_proba_c1_v2: 0.855

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.8031126020710502, 18724.340856543087], [2.69710034254103, 3939.878687125147]]
priors_c0: [8.03112602e-01 1.87243409e+04] priors_c1: [2.7825464

Length of associated TCRs in this round: 67
test sample:  RA9  unique_TCRs:  11509  associated_TCRs:  5
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.851338790318113, 4135.206780500298]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.95198343e+00 4.13520487e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.991  y_proba_c1_v2: 0.991

Length of associated TCRs in this round: 68
test sample:  RA1  unique_TCRs:  3629  associated_TCRs:  2
priors initialization:  [[0.8031126020710502, 18724.340856543087], [2.941685518504801, 4282.151412633972]]
priors_c0: [8.03112602e-01 1.87243409e+04] priors_c1: [3.04508862e+00 4.28214945e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.974  y_proba_c1_v2: 0.974

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.9202930716431865, 4317.739197397147]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [3.05360954e+00 

Length of associated TCRs in this round: 68
test sample:  RA4  unique_TCRs:  12071  associated_TCRs:  5
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.7524417069371436, 3850.4965669769445]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.81057539e+00 3.85049499e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.989  y_proba_c1_v2: 0.989

Length of associated TCRs in this round: 69
test sample:  RA66  unique_TCRs:  14050  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.791528060513221, 3853.26748362011]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.85709122e+00 3.85326568e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.945  y_proba_c1_v2: 0.945

Length of associated TCRs in this round: 66
test sample:  RA62  unique_TCRs:  9287  associated_TCRs:  4
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.677817991120899, 3967.0163733255504]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.81823745e

Length of associated TCRs in this round: 70
test sample:  RA69  unique_TCRs:  816  associated_TCRs:  0
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.975704998000635, 4178.305418449595]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [3.07228257e+00 4.17830339e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.649  y_proba_c1_v2: 0.649

Length of associated TCRs in this round: 37
test sample:  HC10  unique_TCRs:  32310  associated_TCRs:  7
priors initialization:  [[0.5254371275731463, 13311.38040351079], [3.562908969418995, 7789.883747135285]]
priors_c0: [5.25437128e-01 1.33113804e+04] priors_c1: [3.57332578e+00 7.78988246e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.934  y_proba_c1_v2: 0.934

Length of associated TCRs in this round: 70
test sample:  RA12  unique_TCRs:  5154  associated_TCRs:  6
priors initialization:  [[1.0462863753975218, 22767.540290775207], [2.7806666844064183, 3948.5938218009173]]
priors_c0: [8.95908998e-01 2.27675402e+04] priors_c1: [2.89379431e+0

In [34]:
Loocv_MAP(train_origin,count_df,threshold,prior_init_fun = prior_init_estimation3)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[0.75, 16320.25], [8.234375, 11366.265625]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [7.60423109e+00 1.13662523e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.844  y_proba_c1_v2: 0.844

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.47368421052631576, 16640.526315789473], [4.384615384615385, 11504.615384615385]]
priors_c0: [4.73684211e-01 1.66405263e+04] priors_c1: [4.32107498e+00 1.15046138e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.853  y_proba_c1_v2: 0.853

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.7, 16320.3], [7.78125, 11366.71875]]
priors_c0: [7.00000e-01 1.63203e+04] priors_c1: [7.22101271e+00 1.13667068e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.637  y_proba_c1_v2: 0.637

Length

Length of associated TCRs in this round: 62
test sample:  RA48  unique_TCRs:  23112  associated_TCRs:  5
priors initialization:  [[0.75, 16320.25], [7.5625, 11366.9375]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [7.07572177e+00 1.13669295e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.805  y_proba_c1_v2: 0.805

Length of associated TCRs in this round: 67
test sample:  RA6  unique_TCRs:  7346  associated_TCRs:  3
priors initialization:  [[0.75, 16320.25], [8.0625, 11781.9375]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [7.61437510e+00 1.17819284e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.971  y_proba_c1_v2: 0.971

Length of associated TCRs in this round: 32
test sample:  HC1  unique_TCRs:  10237  associated_TCRs:  2
priors initialization:  [[0.5263157894736842, 16640.473684210527], [4.538461538461538, 11504.461538461539]]
priors_c0: [5.26315789e-01 1.66404737e+04] priors_c1: [4.47065952e+00 1.15044598e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.920  y_proba_c1_v2: 0.920

Length of a

Length of associated TCRs in this round: 69
test sample:  RA16  unique_TCRs:  4701  associated_TCRs:  3
priors initialization:  [[0.75, 16320.25], [8.25, 11781.75]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [7.74358668e+00 1.17817419e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.991  y_proba_c1_v2: 0.991

Length of associated TCRs in this round: 31
test sample:  HC15  unique_TCRs:  8726  associated_TCRs:  1
priors initialization:  [[0.5263157894736842, 16640.473684210527], [4.369230769230769, 11504.63076923077]]
priors_c0: [5.26315789e-01 1.66404737e+04] priors_c1: [4.31215093e+00 1.15046292e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.788  y_proba_c1_v2: 0.788

Length of associated TCRs in this round: 36
test sample:  HC12  unique_TCRs:  26265  associated_TCRs:  7
priors initialization:  [[0.7368421052631579, 16000.263157894737], [5.323076923076923, 11503.676923076922]]
priors_c0: [7.36842105e-01 1.60002632e+04] priors_c1: [5.18578339e+00 1.15036599e+04]
y_true: 0  y_pred: 1  y_proba_c

Length of associated TCRs in this round: 40
test sample:  HC11  unique_TCRs:  42732  associated_TCRs:  12
priors initialization:  [[0.7894736842105263, 16000.21052631579], [5.923076923076923, 11503.076923076924]]
priors_c0: [7.89473684e-01 1.60002105e+04] priors_c1: [5.72836839e+00 1.15030721e+04]
y_true: 0  y_pred: 1  y_proba_c1: 0.968  y_proba_c1_v2: 0.968

Length of associated TCRs in this round: 57
test sample:  RA55  unique_TCRs:  35579  associated_TCRs:  9
priors initialization:  [[0.75, 16320.25], [7.03125, 11367.46875]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [6.70220075e+00 1.13674616e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.918  y_proba_c1_v2: 0.918

Length of associated TCRs in this round: 70
test sample:  RA20  unique_TCRs:  3535  associated_TCRs:  2
priors initialization:  [[0.75, 16320.25], [8.359375, 11781.640625]]
priors_c0: [7.500000e-01 1.632025e+04] priors_c1: [7.85104377e+00 1.17815162e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.976  y_proba_c1_v2: 0.976

Len

### Not successful

### 2.4 based on the formula on BDA3 P583

In [23]:
def prior_init_estimation4(train):
    
    train_c0 = train[train['phenotype_status']==0]
    train_c1 = train[train['phenotype_status']==1]
    
    n_c0 = train_c0['unique_TCRs'].tolist()
    k_c0 = train_c0['phenotype_associated_TCRs'].tolist()
    ratio_c0 = np.array(k_c0)/np.array(n_c0)
    
    n_c1 = train_c1['unique_TCRs'].tolist()
    k_c1 = train_c1['phenotype_associated_TCRs'].tolist()
    ratio_c1 = np.array(k_c1)/np.array(n_c1)
    
    ab_c0 = (np.mean(ratio_c0)*(1-np.mean(ratio_c0))/np.var(ratio_c0))-1
    a_c0 = ab_c0*np.mean(ratio_c0)
    b_c0 = ab_c0*(1-np.mean(ratio_c0))
    
    ab_c1 = (np.mean(ratio_c1)*(1-np.mean(ratio_c1))/np.var(ratio_c1))-1
    a_c1 = ab_c1*np.mean(ratio_c1)
    b_c1 = ab_c1*(1-np.mean(ratio_c1))
    
    return [[a_c0,b_c0],[a_c1,b_c1]]

In [25]:
Loocv_MAP(train_origin,prior_init_estimation4)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.595599708471847, 3686.3164052320117]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.75935630e+00 3.68631362e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.879  y_proba_c1_v2: 0.879

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors initialization:  [[0.211897935413126, 9320.177962835523], [2.3147040319151957, 5962.707863980021]]
priors_c0: [2.70864530e-01 9.32017795e+03] priors_c1: [2.45148195e+00 5.96270660e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.868  y_proba_c1_v2: 0.868

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors initialization:  [[0.3853169702679634, 12063.63055000218], [2.4055219246460595, 3595.2377067748494]]
priors_c0: [4.75259618e-01 1.20636305e+04] priors_c1: [2.57428377

Length of associated TCRs in this round: 67
test sample:  RA9  unique_TCRs:  11509  associated_TCRs:  5
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.483187285068996, 3650.1943037597625]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.65298220e+00 3.65019173e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.981  y_proba_c1_v2: 0.981

Length of associated TCRs in this round: 68
test sample:  RA1  unique_TCRs:  3629  associated_TCRs:  2
priors initialization:  [[0.3853169702679634, 12063.63055000218], [2.4721531778903594, 3625.9369573547197]]
priors_c0: [4.75259618e-01 1.20636305e+04] priors_c1: [2.64194130e+00 3.62593442e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.973  y_proba_c1_v2: 0.973

Length of associated TCRs in this round: 66
test sample:  RA5  unique_TCRs:  8581  associated_TCRs:  3
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.4147082090220082, 3536.258309575517]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.57724218e

Length of associated TCRs in this round: 68
test sample:  RA4  unique_TCRs:  12071  associated_TCRs:  5
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.4766100573240473, 3557.3676135532714]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.62739147e+00 3.55736502e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.978  y_proba_c1_v2: 0.978

Length of associated TCRs in this round: 69
test sample:  RA66  unique_TCRs:  14050  associated_TCRs:  4
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.571533567743443, 3657.861202719986]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.73238162e+00 3.65785845e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.922  y_proba_c1_v2: 0.922

Length of associated TCRs in this round: 66
test sample:  RA62  unique_TCRs:  9287  associated_TCRs:  4
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.8429447916399506, 4246.46154291225]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.986699

Length of associated TCRs in this round: 70
test sample:  RA69  unique_TCRs:  816  associated_TCRs:  0
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.743636134478865, 3849.144370077964]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.86411642e+00 3.84914180e+03]
y_true: 1  y_pred: 1  y_proba_c1: 0.649  y_proba_c1_v2: 0.649

Length of associated TCRs in this round: 37
test sample:  HC10  unique_TCRs:  32310  associated_TCRs:  7
priors initialization:  [[0.2960871281196427, 9267.973376291366], [2.8933436128151055, 6510.020065808465]]
priors_c0: [3.90471754e-01 9.26797335e+03] priors_c1: [3.05713177e+00 6.51001810e+03]
y_true: 0  y_pred: 1  y_proba_c1: 0.923  y_proba_c1_v2: 0.923

Length of associated TCRs in this round: 70
test sample:  RA12  unique_TCRs:  5154  associated_TCRs:  6
priors initialization:  [[0.47058661287736847, 13333.053530396262], [2.5446176049434692, 3663.5563303448043]]
priors_c0: [5.80061196e-01 1.33330535e+04] priors_c1: [2.71397492

### 0.63

## Method 3 Hierachical model: a, b follow a distribution

### Method 3.1: uniform distribution

In [6]:
from numpy.random import uniform

In [7]:
priors_init

[[1.35, 19541.5], [18.5, 12364.7]]

In [10]:
a_c0 = uniform(priors_init[0][0]-1,priors_init[0][0]+1,1)
b_c0 = uniform(priors_init[0][1]-1000,priors_init[0][1]+1000,1)

a_c1 = uniform(priors_init[1][0]-1,priors_init[1][0]+1,1)
b_c1 = uniform(priors_init[1][1]-1000,priors_init[1][1]+1000,1)

new_init = [[a_c0,b_c0],[a_c1,b_c1]]
print(new_init)

[[array([0.59266022]), array([19585.99674391])], [array([18.67610234]), array([12355.53064415])]]


In [12]:
LOOCV_MAP(train_origin,count_df,threshold,priors_init_value=new_init)

Length of associated TCRs in this round: 69
test sample:  RA47  unique_TCRs:  16495  associated_TCRs:  4
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.22648438e+00 1.23555063e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.870

Length of associated TCRs in this round: 31
test sample:  HC9  unique_TCRs:  14907  associated_TCRs:  2
priors_c0: [5.92660220e-01 1.95859967e+04] priors_c1: [   13.67610642 12355.53703334]
y_true: 0  y_pred: 0  y_proba_c1: 0.030

Length of associated TCRs in this round: 65
test sample:  RA29  unique_TCRs:  22462  associated_TCRs:  4
priors_c0: [6.99902470e-01 1.95859967e+04] priors_c1: [7.81150428e+00 1.23555087e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.679

Length of associated TCRs in this round: 69
test sample:  RA8  unique_TCRs:  6143  associated_TCRs:  1
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.16872881e+00 1.23555066e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.672

Length of associated TCRs in this round: 69
test sample:  RA63  unique_TCRs

Length of associated TCRs in this round: 68
test sample:  RA22  unique_TCRs:  16752  associated_TCRs:  4
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.13440759e+00 1.23555069e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.866

Length of associated TCRs in this round: 67
test sample:  RA31  unique_TCRs:  5152  associated_TCRs:  4
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [7.90062824e+00 1.23555082e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.998

Length of associated TCRs in this round: 67
test sample:  RA81  unique_TCRs:  10091  associated_TCRs:  5
priors_c0: [6.99902470e-01 1.95859967e+04] priors_c1: [7.88226837e+00 1.23555085e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.996

Length of associated TCRs in this round: 68
test sample:  RA37  unique_TCRs:  1776  associated_TCRs:  3
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [7.96062377e+00 1.23555081e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.999

Length of associated TCRs in this round: 60
test sample:  RA52  unique_TCR

Length of associated TCRs in this round: 69
test sample:  RA11  unique_TCRs:  7655  associated_TCRs:  2
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.16845512e+00 1.23555066e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.885

Length of associated TCRs in this round: 34
test sample:  HC4  unique_TCRs:  16001  associated_TCRs:  4
priors_c0: [6.42345213e-01 1.95859967e+04] priors_c1: [   13.6761057  12355.53644025]
y_true: 0  y_pred: 0  y_proba_c1: 0.420

Length of associated TCRs in this round: 69
test sample:  RA19  unique_TCRs:  5104  associated_TCRs:  1
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.15575821e+00 1.23555067e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.764

Length of associated TCRs in this round: 68
test sample:  RA30  unique_TCRs:  6572  associated_TCRs:  2
priors_c0: [7.88366072e-01 1.95859967e+04] priors_c1: [8.06596772e+00 1.23555073e+04]
y_true: 1  y_pred: 1  y_proba_c1: 0.925

Length of associated TCRs in this round: 62
test sample:  RA24  unique_TCRs: