### import of packages

In [1]:
import numpy as np # better arrays than inbuilt arrays
import matplotlib.pyplot as plt # to plot stuff

import pandas as pd #for DataFrame tables
from IPython.display import display #to display dfs more nicely

import scipy.stats
#from scipy.stats import norm
import statistics

## Functions

### Generation of random distributions from 3 Gaussians and plotting them

In [2]:
def create_data_from_3_gaussians(mean1, mean2, mean3, std1, std2, std3, n1, n2, n3): # Creation of random data points from multiple Gaussians ki
    k1 = np.random.normal(mean1, std1, n1) #creates array with values created through Gaussian
    k2 = np.random.normal(mean2, std2, n2)
    k3 = np.random.normal(mean3, std3, n3)

    return np.concatenate([k1, k2, k3])

In [3]:
def p_xi_given_zj_from_gaussian_datapoints(kall, mean1, mean2, mean3, std1, std2, std3):
    # Calculating p_xi_given_zj (this is what Matt is working on with the simulated data)

    pdf_probability_k1 = scipy.stats.norm.pdf(kall, loc=mean1, scale=std1)
    pdf_probability_k2 = scipy.stats.norm.pdf(kall, loc=mean2, scale=std2)
    pdf_probability_k3 = scipy.stats.norm.pdf(kall, loc=mean3, scale=std3)

    p_xi_given_zj = np.vstack((pdf_probability_k1,pdf_probability_k2,pdf_probability_k3))
    p_xi_given_zj = np.transpose(p_xi_given_zj)

    print("p_xi_given_zj")
    display(pd.DataFrame(p_xi_given_zj))
    print("\n")
    
    print("likeliest peptide z of each datapoint x according to scipy.stats.norm.pdf (should be more accurate than EM because its dedicated to Gaussians)")
    print(np.argmax(p_xi_given_zj, axis=1)) # for each column of P(Xi|Zj), the most likely Peptide is returned
    print("\n")
    
    return p_xi_given_zj

In [4]:
def create_3_gaussians_and_calculate_p_xi_given_zj(mean1, mean2, mean3, std1, std2, std3, n1, n2, n3):
    kall = create_data_from_3_gaussians(mean1, mean2, mean3, std1, std2, std3, n1, n2, n3)
    p_xi_given_zj = p_xi_given_zj_from_gaussian_datapoints(kall, mean1, mean2, mean3, std1, std2, std3)
    
    return p_xi_given_zj

In [5]:
def plot_histograms_and_pdfs_from_gaussians(mean1, mean2, mean3, std1, std2, std3, n1, n2, n3):
    k1 = np.random.normal(mean1, std1, n1) #creates array with values created through Gaussian
    k2 = np.random.normal(mean2, std2, n2)
    k3 = np.random.normal(mean3, std3, n3)
    
    # plotting histograms
    nbins = 50
    plt.hist(k1, label = "Peptide 0", bins=nbins, alpha=0.3, density=True, color="orange") # alpha=transparency, density=True normalises to 1 
    plt.hist(k2, label = "Peptide 1", bins=nbins, alpha=0.3, density=True, color="green")
    plt.hist(k3, label = "Peptide 2", bins=nbins, alpha=0.3, density=True, color="blue")

    # PDF plot
    xmin, xmax = plt.xlim() #finds lower and upper bounds of histogram data
    x = np.linspace(start=xmin, stop=xmax, num=100) #num is the number of returned data points - the more points, the finer the fit is plotted
    p1 = scipy.stats.norm.pdf(x, mean1, std1)
    p2 = scipy.stats.norm.pdf(x, mean2, std2)
    p3 = scipy.stats.norm.pdf(x, mean3, std3)

    plt.plot(x, p1, linewidth=2, color = "orange", label = "Gauss function k1: mean = {:.2f}, STD = {:.2f}".format(mean1, std1))
    plt.plot(x, p2, linewidth=2, color = "green", label = "Gauss function k2: mean = {:.2f}, STD = {:.2f}".format(mean2, std2))
    plt.plot(x, p3, linewidth=2, color = "blue", label = "Gauss function k3: mean = {:.2f}, STD = {:.2f}".format(mean3, std3))

    plt.legend(loc='upper right')
    plt.title("PDFs of dwarves and humans")

    plt.show()

### EM    

First iteration:
- assume P(Zi)=1/m for all i

Real data: a bunch of dye seq data points Xi.

Goal: Figure out probability of real peptide seqs Zj given all those measured Xi.
____
However, I am starting with the list of P(Xi|Zj) that Matt will provide me, instead of a list of P(Xi). In fact, I never even use P(Xi), which is a little weird.

In [6]:
def update_p_zj_given_xi(p_xi_given_zj, p_zj, p_zj_given_xi):
    denominator = 0

    for i, row in enumerate(p_xi_given_zj): # Calculating/Updating P(Zj|Xi)
        #print("ROW of P(Xi|Zj):", i)
        for j, cell in enumerate(row):
            #print("COLUMN of P(Xi|Zj):", j)
            numerator = cell * p_zj[j]
            #print("numerator:", numerator)
            
            for l, cell in enumerate(p_zj):
                # print("cell", i, l, p_xi_given_zj[i, l], end="")
                # print(" * zl", p_zj[l])
                denominator = denominator + p_xi_given_zj[i, l] * p_zj[l]
            p_zj_given_xi[j][i] = numerator/denominator
            denominator = 0                                
        #display(pd.DataFrame(p_zj_given_xi))
        
    #print((np.argmax(p_zj_given_xi, axis=1))) # reports index of max value from each row
    
    return p_zj_given_xi

In [7]:
def update_p_zj(p_zj, p_zj_given_xi):    
    for j, element in enumerate(p_zj): #updating the expectation value of Zi
        p_zj[j] = p_zj_given_xi[j].sum()/p_xi_given_zj.shape[1] # divide by number of peptides z
        
    # print("p_zj")
    # display(pd.DataFrame(p_zj))
    
    return p_zj

In [8]:
def EM(p_xi_given_zj):
    ### initialise p_zj_given_xi
    # p_xi_given_zj = np.arange(0,102,1).reshape(34,3) # for testing EM iterating through columns and rows
    p_zj_given_xi = np.full((p_xi_given_zj.shape[1], p_xi_given_zj.shape[0]), 0, dtype=float) #Initialisation based on array size of p_xi_given_zj -- same size, but transposed
    #display(pd.DataFrame(p_zj_given_xi))
    
    ### initialise p_zj
    n = p_xi_given_zj.shape[1] # number of peptides
    p_zj_initial = 1/n #initial approximation: all zj equally likely, to jumpstart first iteration
    p_zj = np.full(n, p_zj_initial)
    
    loopcounter = 0
    while loopcounter <= 100:
        p_zj_given_xi = update_p_zj_given_xi(p_xi_given_zj, p_zj, p_zj_given_xi)
        p_zj = update_p_zj(p_zj, p_zj_given_xi)
        
        loopcounter = loopcounter + 1
        
    print("P_zj_given_xi:")
    display(pd.DataFrame(p_zj_given_xi))
    print("\n")
    
    print("P_zj:")
    display(pd.DataFrame(p_zj))
    p_zj_fraction = p_zj/np.sum(p_zj)
    print("P_zj_fraction")
    display(pd.DataFrame(p_zj_fraction))
    print("\n")
    
    print("Likeliest peptide z for each datapoint x:")
    print((np.argmax(p_zj_given_xi, axis=0))) # reports index of max value from each row
    print("\n")
    
    print("Value of likeliest z:")
    print(np.amax(p_zj_given_xi, axis=0)) #reports max value from each row
    print("\n")
    
    return p_zj

## bootstrapping

Takes in full p_xi_given_zj dataset, and then returns subarray of new p_xi_given_zj

In [105]:
def create_subarray_of_p_xi_given_zj(p_xi_given_zj, bootstrap_sampled_fraction):
    df_p_xi_given_zj = pd.DataFrame(p_xi_given_zj)
    df_p_xi_given_zj_sample = df_p_xi_given_zj.sample(frac=bootstrap_sampled_fraction, axis='rows', replace=True) # filters for a random partial dataset
    display(pd.DataFrame(df_p_xi_given_zj_sample))
    p_xi_given_zj_subarray = df_p_xi_given_zj_sample.to_numpy()
    
    display(pd.DataFrame(p_xi_given_zj_subarray))
    
    return p_xi_given_zj_subarray

In [106]:
def bootstrap_EM(n_bootstrap_runs, bootstrap_sampled_fraction):
    i = 0
    
    p_zj_bootstrap = np.full((p_xi_given_zj.shape[1]), 0, dtype=float)
    print("p_zj_bootstrap", p_zj_bootstrap)
    
    while i < n_bootstrap_runs:
        p_xi_given_zj_subarray = create_subarray_of_p_xi_given_zj(p_xi_given_zj, bootstrap_sampled_fraction)
        
        if i == 0:
            p_zj_bootstrap = EM(p_xi_given_zj_subarray)
            
        else:
            p_zj_bootstrap = np.vstack((p_zj_bootstrap, EM(p_xi_given_zj_subarray)))
        
        i = i + 1
    return p_zj_bootstrap

## call functions

In [110]:
# esc, ctrl+a, ctrl+enter to run all cells

#p_xi_given_zj = create_3_gaussians_and_calculate_p_xi_given_zj(140, 150, 160, 15, 15, 15, 5, 5, 5) #mean1-3, std1-3, n1-3
# plot_histograms_and_pdfs_from_gaussians(140, 150, 160, 15, 15, 15, 5, 5, 5)

p_xi_given_zj = np.genfromtxt('uniform-2-peps-scores.csv', delimiter=',') # import full dataset
p_zj_bootstrap_results = bootstrap_EM(30, 0.7) # n bootstrap runs, fraction of sampled data per run

p_zj_bootstrap [0. 0.]


Unnamed: 0,0,1
6,609.576654,609.576628
0,442.788477,3504.082019
9,40924.985131,4458.562523
1,353113.978894,38469.91614
5,79290.092111,8638.239712
4,13198.296557,1437.8852
7,83233.660212,9067.8708


Unnamed: 0,0,1
0,609.576654,609.576628
1,442.788477,3504.082019
2,40924.985131,4458.562523
3,353113.978894,38469.91614
4,79290.092111,8638.239712
5,13198.296557,1437.8852
6,83233.660212,9067.8708


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.93349,0.639452,0.992298,0.992298,0.992298,0.992298,0.992298
1,0.06651,0.360548,0.007702,0.007702,0.007702,0.007702,0.007702




P_zj:


Unnamed: 0,0
0,3.267215
1,0.232785


P_zj_fraction


Unnamed: 0,0
0,0.93349
1,0.06651




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.93349    0.63945195 0.99229761 0.99229761 0.99229761 0.99229761
 0.99229761]




Unnamed: 0,0,1
4,13198.296557,1437.8852
8,132783.199901,14466.033308
8,132783.199901,14466.033308
1,353113.978894,38469.91614
6,609.576654,609.576628
4,13198.296557,1437.8852
5,79290.092111,8638.239712


Unnamed: 0,0,1
0,13198.296557,1437.8852
1,132783.199901,14466.033308
2,132783.199901,14466.033308
3,353113.978894,38469.91614
4,609.576654,609.576628
5,13198.296557,1437.8852
6,79290.092111,8638.239712


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.5439729999999999e-63,1.682077e-64,1.682077e-64




P_zj:


Unnamed: 0,0
0,3.5
1,1.2766089999999998e-63


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.647456e-64




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
6,609.576654,609.576628
9,40924.985131,4458.562523
9,40924.985131,4458.562523
7,83233.660212,9067.8708
1,353113.978894,38469.91614
0,442.788477,3504.082019
8,132783.199901,14466.033308


Unnamed: 0,0,1
0,609.576654,609.576628
1,40924.985131,4458.562523
2,40924.985131,4458.562523
3,83233.660212,9067.8708
4,353113.978894,38469.91614
5,442.788477,3504.082019
6,132783.199901,14466.033308


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.93349,0.992298,0.992298,0.992298,0.992298,0.639452,0.992298
1,0.06651,0.007702,0.007702,0.007702,0.007702,0.360548,0.007702




P_zj:


Unnamed: 0,0
0,3.267215
1,0.232785


P_zj_fraction


Unnamed: 0,0
0,0.93349
1,0.06651




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.93349    0.99229761 0.99229761 0.99229761 0.99229761 0.63945195
 0.99229761]




Unnamed: 0,0,1
1,353114.0,38469.92
4,13198.3,1437.885
9,40924.99,4458.563
2,1087464.0,1087464.0
1,353114.0,38469.92
5,79290.09,8638.24
8,132783.2,14466.03


Unnamed: 0,0,1
0,353114.0,38469.92
1,13198.3,1437.885
2,40924.99,4458.563
3,1087464.0,1087464.0
4,353114.0,38469.92
5,79290.09,8638.24
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.682081e-64,1.682081e-64,1.682081e-64,1.543976e-63,1.682081e-64,1.682081e-64,1.682081e-64




P_zj:


Unnamed: 0,0
0,3.5
1,1.276613e-63


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.647465e-64




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
2,1087464.0,1087464.0
2,1087464.0,1087464.0
2,1087464.0,1087464.0
4,13198.3,1437.885
8,132783.2,14466.03
4,13198.3,1437.885
9,40924.99,4458.563


Unnamed: 0,0,1
0,1087464.0,1087464.0
1,1087464.0,1087464.0
2,1087464.0,1087464.0
3,13198.3,1437.885
4,132783.2,14466.03
5,13198.3,1437.885
6,40924.99,4458.563


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,7.338315e-32,7.338315e-32,7.338315e-32,7.994709e-33,7.994709e-33,7.994709e-33,7.994709e-33




P_zj:


Unnamed: 0,0
0,3.5
1,1.260641e-31


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.601833e-32




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
9,40924.985131,4458.562523
5,79290.092111,8638.239712
0,442.788477,3504.082019
8,132783.199901,14466.033308
5,79290.092111,8638.239712
1,353113.978894,38469.91614
8,132783.199901,14466.033308


Unnamed: 0,0,1
0,40924.985131,4458.562523
1,79290.092111,8638.239712
2,442.788477,3504.082019
3,132783.199901,14466.033308
4,79290.092111,8638.239712
5,353113.978894,38469.91614
6,132783.199901,14466.033308


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.995908,0.995908,0.770134,0.995908,0.995908,0.995908,0.995908
1,0.004092,0.004092,0.229866,0.004092,0.004092,0.004092,0.004092




P_zj:


Unnamed: 0,0
0,3.37279
1,0.12721


P_zj_fraction


Unnamed: 0,0
0,0.963654
1,0.036346




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.99590781 0.99590781 0.7701341  0.99590781 0.99590781 0.99590781
 0.99590781]




Unnamed: 0,0,1
4,13198.3,1437.885
8,132783.2,14466.03
7,83233.66,9067.871
2,1087464.0,1087464.0
4,13198.3,1437.885
1,353114.0,38469.92
4,13198.3,1437.885


Unnamed: 0,0,1
0,13198.3,1437.885
1,132783.2,14466.03
2,83233.66,9067.871
3,1087464.0,1087464.0
4,13198.3,1437.885
5,353114.0,38469.92
6,13198.3,1437.885


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.682082e-64,1.682082e-64,1.682082e-64,1.543977e-63,1.682082e-64,1.682082e-64,1.682082e-64




P_zj:


Unnamed: 0,0
0,3.5
1,1.276613e-63


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.647465e-64




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
9,40924.985131,4458.562523
8,132783.199901,14466.033308
8,132783.199901,14466.033308
4,13198.296557,1437.8852
1,353113.978894,38469.91614
6,609.576654,609.576628
5,79290.092111,8638.239712


Unnamed: 0,0,1
0,40924.985131,4458.562523
1,132783.199901,14466.033308
2,132783.199901,14466.033308
3,13198.296557,1437.8852
4,353113.978894,38469.91614
5,609.576654,609.576628
6,79290.092111,8638.239712


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.5439729999999999e-63,1.682077e-64




P_zj:


Unnamed: 0,0
0,3.5
1,1.2766089999999998e-63


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.647456e-64




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
1,353114.0,38469.92
3,43.91621,347.5384
0,442.7885,3504.082
0,442.7885,3504.082
2,1087464.0,1087464.0
2,1087464.0,1087464.0
5,79290.09,8638.24


Unnamed: 0,0,1
0,353114.0,38469.92
1,43.91621,347.5384
2,442.7885,3504.082
3,442.7885,3504.082
4,1087464.0,1087464.0
5,1087464.0,1087464.0
6,79290.09,8638.24


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.8515,0.073163,0.073163,0.073163,0.384497,0.384497,0.8515
1,0.1485,0.926837,0.926837,0.926837,0.615503,0.615503,0.1485




P_zj:


Unnamed: 0,0
0,1.345741
1,2.154259


P_zj_fraction


Unnamed: 0,0
0,0.384497
1,0.615503




Likeliest peptide z for each datapoint x:
[0 1 1 1 1 1 0]


Value of likeliest z:
[0.85149974 0.92683738 0.92683738 0.92683738 0.61550253 0.61550253
 0.85149974]




Unnamed: 0,0,1
3,43.916213,347.538432
9,40924.985131,4458.562523
1,353113.978894,38469.91614
6,609.576654,609.576628
5,79290.092111,8638.239712
8,132783.199901,14466.033308
4,13198.296557,1437.8852


Unnamed: 0,0,1
0,43.916213,347.538432
1,40924.985131,4458.562523
2,353113.978894,38469.91614
3,609.576654,609.576628
4,79290.092111,8638.239712
5,132783.199901,14466.033308
6,13198.296557,1437.8852


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.639452,0.992298,0.992298,0.93349,0.992298,0.992298,0.992298
1,0.360548,0.007702,0.007702,0.06651,0.007702,0.007702,0.007702




P_zj:


Unnamed: 0,0
0,3.267215
1,0.232785


P_zj_fraction


Unnamed: 0,0
0,0.93349
1,0.06651




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.63945195 0.99229761 0.99229761 0.93349    0.99229761 0.99229761
 0.99229761]




Unnamed: 0,0,1
0,442.7885,3504.082
2,1087464.0,1087464.0
9,40924.99,4458.563
6,609.5767,609.5766
6,609.5767,609.5766
2,1087464.0,1087464.0
2,1087464.0,1087464.0


Unnamed: 0,0,1
0,442.7885,3504.082
1,1087464.0,1087464.0
2,40924.99,4458.563
3,609.5767,609.5766
4,609.5767,609.5766
5,1087464.0,1087464.0
6,1087464.0,1087464.0


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.116723,0.511188,0.905653,0.511188,0.511188,0.511188,0.511188
1,0.883277,0.488812,0.094347,0.488812,0.488812,0.488812,0.488812




P_zj:


Unnamed: 0,0
0,1.789158
1,1.710842


P_zj_fraction


Unnamed: 0,0
0,0.511188
1,0.488812




Likeliest peptide z for each datapoint x:
[1 0 0 0 0 0 0]


Value of likeliest z:
[0.88327666 0.51118807 0.90565277 0.51118808 0.51118808 0.51118807
 0.51118807]




Unnamed: 0,0,1
1,353114.0,38469.92
3,43.91621,347.5384
0,442.7885,3504.082
2,1087464.0,1087464.0
8,132783.2,14466.03
1,353114.0,38469.92
8,132783.2,14466.03


Unnamed: 0,0,1
0,353114.0,38469.92
1,43.91621,347.5384
2,442.7885,3504.082
3,1087464.0,1087464.0
4,132783.2,14466.03
5,353114.0,38469.92
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.959806,0.247405,0.247405,0.722339,0.959806,0.959806,0.959806
1,0.040194,0.752595,0.752595,0.277661,0.040194,0.040194,0.040194




P_zj:


Unnamed: 0,0
0,2.528187
1,0.971813


P_zj_fraction


Unnamed: 0,0
0,0.722339
1,0.277661




Likeliest peptide z for each datapoint x:
[0 1 1 0 0 0 0]


Value of likeliest z:
[0.95980579 0.75259451 0.75259451 0.72233902 0.95980579 0.95980579
 0.95980579]




Unnamed: 0,0,1
2,1087464.0,1087464.0
2,1087464.0,1087464.0
8,132783.2,14466.03
6,609.5767,609.5766
9,40924.99,4458.563
4,13198.3,1437.885
8,132783.2,14466.03


Unnamed: 0,0,1
0,1087464.0,1087464.0
1,1087464.0,1087464.0
2,132783.2,14466.03
3,609.5767,609.5766
4,40924.99,4458.563
5,13198.3,1437.885
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,7.338306e-32,7.338306e-32,7.994699e-33,7.338306e-32,7.994699e-33,7.994699e-33,7.994699e-33




P_zj:


Unnamed: 0,0
0,3.5
1,1.26064e-31


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.601828e-32




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
3,43.91621,347.5384
2,1087464.0,1087464.0
2,1087464.0,1087464.0
9,40924.99,4458.563
5,79290.09,8638.24
4,13198.3,1437.885
9,40924.99,4458.563


Unnamed: 0,0,1
0,43.91621,347.5384
1,1087464.0,1087464.0
2,1087464.0,1087464.0
3,40924.99,4458.563
4,79290.09,8638.24
5,13198.3,1437.885
6,40924.99,4458.563


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.50877,0.89126,0.89126,0.986882,0.986882,0.986882,0.986882
1,0.49123,0.10874,0.10874,0.013118,0.013118,0.013118,0.013118




P_zj:


Unnamed: 0,0
0,3.119409
1,0.380591


P_zj_fraction


Unnamed: 0,0
0,0.89126
1,0.10874




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.50876979 0.8912598  0.8912598  0.98688231 0.98688231 0.98688231
 0.98688231]




Unnamed: 0,0,1
7,83233.66,9067.871
2,1087464.0,1087464.0
6,609.5767,609.5766
9,40924.99,4458.563
2,1087464.0,1087464.0
1,353114.0,38469.92
8,132783.2,14466.03


Unnamed: 0,0,1
0,83233.66,9067.871
1,1087464.0,1087464.0
2,609.5767,609.5766
3,40924.99,4458.563
4,1087464.0,1087464.0
5,353114.0,38469.92
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,7.994699e-33,7.338306e-32,7.338305e-32,7.994699e-33,7.338306e-32,7.994699e-33,7.994699e-33




P_zj:


Unnamed: 0,0
0,3.5
1,1.26064e-31


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.601828e-32




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
4,13198.296557,1437.8852
7,83233.660212,9067.8708
7,83233.660212,9067.8708
4,13198.296557,1437.8852
8,132783.199901,14466.033308
7,83233.660212,9067.8708
4,13198.296557,1437.8852


Unnamed: 0,0,1
0,13198.296557,1437.8852
1,83233.660212,9067.8708
2,83233.660212,9067.8708
3,13198.296557,1437.8852
4,132783.199901,14466.033308
5,83233.660212,9067.8708
6,13198.296557,1437.8852


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,5.725843e-98,5.725843e-98,5.725843e-98,5.725843e-98,5.725843e-98,5.725843e-98,5.725843e-98




P_zj:


Unnamed: 0,0
0,3.5
1,2.004045e-97


P_zj_fraction


Unnamed: 0,0
0,1.0
1,5.725843e-98




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
0,442.7885,3504.082
6,609.5767,609.5766
2,1087464.0,1087464.0
4,13198.3,1437.885
3,43.91621,347.5384
2,1087464.0,1087464.0
7,83233.66,9067.871


Unnamed: 0,0,1
0,442.7885,3504.082
1,609.5767,609.5766
2,1087464.0,1087464.0
3,13198.3,1437.885
4,43.91621,347.5384
5,1087464.0,1087464.0
6,83233.66,9067.871


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.116723,0.511188,0.511188,0.905653,0.116723,0.511188,0.905653
1,0.883277,0.488812,0.488812,0.094347,0.883277,0.488812,0.094347




P_zj:


Unnamed: 0,0
0,1.789158
1,1.710842


P_zj_fraction


Unnamed: 0,0
0,0.511188
1,0.488812




Likeliest peptide z for each datapoint x:
[1 0 0 0 1 0 0]


Value of likeliest z:
[0.88327666 0.51118807 0.51118806 0.90565277 0.88327666 0.51118806
 0.90565277]




Unnamed: 0,0,1
0,442.788477,3504.082019
7,83233.660212,9067.8708
3,43.916213,347.538432
9,40924.985131,4458.562523
9,40924.985131,4458.562523
0,442.788477,3504.082019
7,83233.660212,9067.8708


Unnamed: 0,0,1
0,442.788477,3504.082019
1,83233.660212,9067.8708
2,43.916213,347.538432
3,40924.985131,4458.562523
4,40924.985131,4458.562523
5,442.788477,3504.082019
6,83233.660212,9067.8708


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.160284,0.932729,0.160284,0.932729,0.932729,0.160284,0.932729
1,0.839716,0.067271,0.839716,0.067271,0.067271,0.839716,0.067271




P_zj:


Unnamed: 0,0
0,2.105885
1,1.394115


P_zj_fraction


Unnamed: 0,0
0,0.601681
1,0.398319




Likeliest peptide z for each datapoint x:
[1 0 1 0 0 1 0]


Value of likeliest z:
[0.83971595 0.93272928 0.83971595 0.93272928 0.93272928 0.83971595
 0.93272928]




Unnamed: 0,0,1
2,1087464.0,1087464.0
2,1087464.0,1087464.0
3,43.91621,347.5384
9,40924.99,4458.563
9,40924.99,4458.563
2,1087464.0,1087464.0
9,40924.99,4458.563


Unnamed: 0,0,1
0,1087464.0,1087464.0
1,1087464.0,1087464.0
2,43.91621,347.5384
3,40924.99,4458.563
4,40924.99,4458.563
5,1087464.0,1087464.0
6,40924.99,4458.563


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.827915,0.827915,0.378088,0.977857,0.977857,0.827915,0.977857
1,0.172085,0.172085,0.621912,0.022143,0.022143,0.172085,0.022143




P_zj:


Unnamed: 0,0
0,2.897701
1,0.602299


P_zj_fraction


Unnamed: 0,0
0,0.827915
1,0.172085




Likeliest peptide z for each datapoint x:
[0 0 1 0 0 0 0]


Value of likeliest z:
[0.82791451 0.82791451 0.62191236 0.9778568  0.9778568  0.82791451
 0.9778568 ]




Unnamed: 0,0,1
2,1087464.0,1087464.0
7,83233.66,9067.871
2,1087464.0,1087464.0
2,1087464.0,1087464.0
2,1087464.0,1087464.0
3,43.91621,347.5384
8,132783.2,14466.03


Unnamed: 0,0,1
0,1087464.0,1087464.0
1,83233.66,9067.871
2,1087464.0,1087464.0
3,1087464.0,1087464.0
4,1087464.0,1087464.0
5,43.91621,347.5384
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.722339,0.959806,0.722339,0.722339,0.722339,0.247405,0.959806
1,0.277661,0.040194,0.277661,0.277661,0.277661,0.752595,0.040194




P_zj:


Unnamed: 0,0
0,2.528187
1,0.971813


P_zj_fraction


Unnamed: 0,0
0,0.722339
1,0.277661




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 1 0]


Value of likeliest z:
[0.72233903 0.95980579 0.72233903 0.72233903 0.72233903 0.75259451
 0.95980579]




Unnamed: 0,0,1
4,13198.3,1437.885
3,43.91621,347.5384
3,43.91621,347.5384
5,79290.09,8638.24
5,79290.09,8638.24
2,1087464.0,1087464.0
0,442.7885,3504.082


Unnamed: 0,0,1
0,13198.3,1437.885
1,43.91621,347.5384
2,43.91621,347.5384
3,79290.09,8638.24
4,79290.09,8638.24
5,1087464.0,1087464.0
6,442.7885,3504.082


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.905653,0.116723,0.116723,0.905653,0.905653,0.511188,0.116723
1,0.094347,0.883277,0.883277,0.094347,0.094347,0.488812,0.883277




P_zj:


Unnamed: 0,0
0,1.789158
1,1.710842


P_zj_fraction


Unnamed: 0,0
0,0.511188
1,0.488812




Likeliest peptide z for each datapoint x:
[0 1 1 0 0 0 1]


Value of likeliest z:
[0.90565277 0.88327667 0.88327667 0.90565277 0.90565277 0.51118805
 0.88327667]




Unnamed: 0,0,1
3,43.91621,347.5384
8,132783.2,14466.03
0,442.7885,3504.082
2,1087464.0,1087464.0
3,43.91621,347.5384
7,83233.66,9067.871
9,40924.99,4458.563


Unnamed: 0,0,1
0,43.91621,347.5384
1,132783.2,14466.03
2,442.7885,3504.082
3,1087464.0,1087464.0
4,43.91621,347.5384
5,83233.66,9067.871
6,40924.99,4458.563


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.116723,0.905653,0.116723,0.511188,0.116723,0.905653,0.905653
1,0.883277,0.094347,0.883277,0.488812,0.883277,0.094347,0.094347




P_zj:


Unnamed: 0,0
0,1.789158
1,1.710842


P_zj_fraction


Unnamed: 0,0
0,0.511188
1,0.488812




Likeliest peptide z for each datapoint x:
[1 0 1 0 1 0 0]


Value of likeliest z:
[0.88327667 0.90565277 0.88327667 0.51118805 0.88327667 0.90565277
 0.90565277]




Unnamed: 0,0,1
4,13198.3,1437.885
2,1087464.0,1087464.0
6,609.5767,609.5766
8,132783.2,14466.03
2,1087464.0,1087464.0
2,1087464.0,1087464.0
1,353114.0,38469.92


Unnamed: 0,0,1
0,13198.3,1437.885
1,1087464.0,1087464.0
2,609.5767,609.5766
3,132783.2,14466.03
4,1087464.0,1087464.0
5,1087464.0,1087464.0
6,353114.0,38469.92


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,7.955992e-23,7.3027770000000005e-22,7.302776e-22,7.955992e-23,7.3027770000000005e-22,7.3027770000000005e-22,7.955992e-23




P_zj:


Unnamed: 0,0
0,3.5
1,1.579895e-21


P_zj_fraction


Unnamed: 0,0
0,1.0
1,4.513986000000001e-22




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
6,609.5767,609.5766
1,353114.0,38469.92
6,609.5767,609.5766
0,442.7885,3504.082
8,132783.2,14466.03
2,1087464.0,1087464.0
9,40924.99,4458.563


Unnamed: 0,0,1
0,609.5767,609.5766
1,353114.0,38469.92
2,609.5767,609.5766
3,442.7885,3504.082
4,132783.2,14466.03
5,1087464.0,1087464.0
6,40924.99,4458.563


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.827915,0.977857,0.827915,0.378088,0.977857,0.827915,0.977857
1,0.172085,0.022143,0.172085,0.621912,0.022143,0.172085,0.022143




P_zj:


Unnamed: 0,0
0,2.897701
1,0.602299


P_zj_fraction


Unnamed: 0,0
0,0.827915
1,0.172085




Likeliest peptide z for each datapoint x:
[0 0 0 1 0 0 0]


Value of likeliest z:
[0.82791452 0.9778568  0.82791452 0.62191235 0.9778568  0.82791452
 0.9778568 ]




Unnamed: 0,0,1
4,13198.296557,1437.8852
9,40924.985131,4458.562523
1,353113.978894,38469.91614
6,609.576654,609.576628
7,83233.660212,9067.8708
0,442.788477,3504.082019
3,43.916213,347.538432


Unnamed: 0,0,1
0,13198.296557,1437.8852
1,40924.985131,4458.562523
2,353113.978894,38469.91614
3,609.576654,609.576628
4,83233.660212,9067.8708
5,442.788477,3504.082019
6,43.916213,347.538432


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.959806,0.959806,0.959806,0.722339,0.959806,0.247405,0.247405
1,0.040194,0.040194,0.040194,0.277661,0.040194,0.752595,0.752595




P_zj:


Unnamed: 0,0
0,2.528187
1,0.971813


P_zj_fraction


Unnamed: 0,0
0,0.722339
1,0.277661




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 1 1]


Value of likeliest z:
[0.95980579 0.95980579 0.95980579 0.72233904 0.95980579 0.75259451
 0.75259451]




Unnamed: 0,0,1
9,40924.99,4458.563
1,353114.0,38469.92
6,609.5767,609.5766
2,1087464.0,1087464.0
7,83233.66,9067.871
7,83233.66,9067.871
1,353114.0,38469.92


Unnamed: 0,0,1
0,40924.99,4458.563
1,353114.0,38469.92
2,609.5767,609.5766
3,1087464.0,1087464.0
4,83233.66,9067.871
5,83233.66,9067.871
6,353114.0,38469.92


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,7.794933e-46,7.794933e-46,7.154941e-45,7.154941e-45,7.794933e-46,7.794933e-46,7.794933e-46




P_zj:


Unnamed: 0,0
0,3.5
1,9.103675e-45


P_zj_fraction


Unnamed: 0,0
0,1.0
1,2.60105e-45




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




Unnamed: 0,0,1
1,353113.978894,38469.91614
3,43.916213,347.538432
7,83233.660212,9067.8708
4,13198.296557,1437.8852
0,442.788477,3504.082019
9,40924.985131,4458.562523
9,40924.985131,4458.562523


Unnamed: 0,0,1
0,353113.978894,38469.91614
1,43.916213,347.538432
2,83233.660212,9067.8708
3,13198.296557,1437.8852
4,442.788477,3504.082019
5,40924.985131,4458.562523
6,40924.985131,4458.562523


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.970636,0.312747,0.970636,0.970636,0.312747,0.970636,0.970636
1,0.029364,0.687253,0.029364,0.029364,0.687253,0.029364,0.029364




P_zj:


Unnamed: 0,0
0,2.739338
1,0.760662


P_zj_fraction


Unnamed: 0,0
0,0.782668
1,0.217332




Likeliest peptide z for each datapoint x:
[0 1 0 0 1 0 0]


Value of likeliest z:
[0.9706364  0.68725344 0.9706364  0.9706364  0.68725344 0.9706364
 0.9706364 ]




Unnamed: 0,0,1
6,609.5767,609.5766
0,442.7885,3504.082
7,83233.66,9067.871
3,43.91621,347.5384
2,1087464.0,1087464.0
8,132783.2,14466.03
8,132783.2,14466.03


Unnamed: 0,0,1
0,609.5767,609.5766
1,442.7885,3504.082
2,83233.66,9067.871
3,43.91621,347.5384
4,1087464.0,1087464.0
5,132783.2,14466.03
6,132783.2,14466.03


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.637879,0.182064,0.941755,0.182064,0.637879,0.941755,0.941755
1,0.362121,0.817936,0.058245,0.817936,0.362121,0.058245,0.058245




P_zj:


Unnamed: 0,0
0,2.232575
1,1.267425


P_zj_fraction


Unnamed: 0,0
0,0.637879
1,0.362121




Likeliest peptide z for each datapoint x:
[0 1 0 1 0 0 0]


Value of likeliest z:
[0.63787865 0.81793559 0.94175479 0.81793559 0.63787864 0.94175479
 0.94175479]




Unnamed: 0,0,1
5,79290.092111,8638.239712
6,609.576654,609.576628
4,13198.296557,1437.8852
4,13198.296557,1437.8852
0,442.788477,3504.082019
9,40924.985131,4458.562523
1,353113.978894,38469.91614


Unnamed: 0,0,1
0,79290.092111,8638.239712
1,609.576654,609.576628
2,13198.296557,1437.8852
3,13198.296557,1437.8852
4,442.788477,3504.082019
5,40924.985131,4458.562523
6,353113.978894,38469.91614


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,0.992298,0.93349,0.992298,0.992298,0.639452,0.992298,0.992298
1,0.007702,0.06651,0.007702,0.007702,0.360548,0.007702,0.007702




P_zj:


Unnamed: 0,0
0,3.267215
1,0.232785


P_zj_fraction


Unnamed: 0,0
0,0.93349
1,0.06651




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[0.99229761 0.93349    0.99229761 0.99229761 0.63945195 0.99229761
 0.99229761]




Unnamed: 0,0,1
4,13198.296557,1437.8852
8,132783.199901,14466.033308
5,79290.092111,8638.239712
8,132783.199901,14466.033308
9,40924.985131,4458.562523
4,13198.296557,1437.8852
6,609.576654,609.576628


Unnamed: 0,0,1
0,13198.296557,1437.8852
1,132783.199901,14466.033308
2,79290.092111,8638.239712
3,132783.199901,14466.033308
4,40924.985131,4458.562523
5,13198.296557,1437.8852
6,609.576654,609.576628


P_zj_given_xi:


Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.682077e-64,1.5439729999999999e-63




P_zj:


Unnamed: 0,0
0,3.5
1,1.2766089999999998e-63


P_zj_fraction


Unnamed: 0,0
0,1.0
1,3.647456e-64




Likeliest peptide z for each datapoint x:
[0 0 0 0 0 0 0]


Value of likeliest z:
[1. 1. 1. 1. 1. 1. 1.]




## output

In [111]:
p_zj_bootstrap_results_fraction = p_zj_bootstrap_results/np.sum(p_zj_bootstrap_results, axis = 1)[0]
print("All zj bootstrapping values: ")
display(pd.DataFrame(p_zj_bootstrap_results_fraction))

p_zj_bootstrap_results_fraction_avg = np.sum(p_zj_bootstrap_results_fraction, axis = 0)/p_zj_bootstrap_results.shape[0]
print("AVG zj bootstrapping values: \n", p_zj_bootstrap_results_fraction_avg, sep="")

p_zj_bootstrap_results__fraction_std = np.std(p_zj_bootstrap_results_fraction, axis = 0)
print("STD of average zj bootstrapping values: \n", p_zj_bootstrap_results__fraction_std, sep="")

for i, avg_value in enumerate(p_zj_bootstrap_results_fraction_avg):
    print("Peptide", i, ": ", avg_value, "±", p_zj_bootstrap_results__fraction_std[i])

All zj bootstrapping values: 


Unnamed: 0,0,1
0,0.93349,0.06651
1,1.0,3.647456e-64
2,0.93349,0.06651
3,1.0,3.647465e-64
4,1.0,3.601833e-32
5,0.963654,0.03634558
6,1.0,3.647465e-64
7,1.0,3.647456e-64
8,0.384497,0.6155025
9,0.93349,0.06651


AVG zj bootstrapping values: 
[0.82877326 0.17122674]
STD of average zj bootstrapping values: 
[0.1926491 0.1926491]
Peptide 0 :  0.8287732621419402 ± 0.19264910183107203
Peptide 1 :  0.17122673785805959 ± 0.192649101831072
