In [1]:
import TunaSims
import numpy as np
from funcOb import func_ob
import pandas as pd
import tools_fast

In [2]:
def harmonic_mean_distance(p, q):
    r"""
    Harmonic mean distance:

    .. math::

        1-2\sum(\frac{P_{i}Q_{i}}{P_{i}+Q_{i}})
    """
    return 2 * np.sum(p * q / (p + q))

def lorentzian_distance(p, q):
    r"""
    Lorentzian distance:

    .. math::

        \sum{\ln(1+|P_i-Q_i|)}
    """
    return np.sum(np.log(1 + np.abs(p - q)))

def matusita_distance(p, q):
    r"""
    Matusita distance:

    .. math::

        \sqrt{\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2}
    """
    return np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2))

def probabilistic_symmetric_chi_squared_distance(p, q):
    r"""
    Probabilistic symmetric Ï‡2 distance:

    .. math::

        \frac{1}{2} \times \sum\frac{(P_{i}-Q_{i}\ )^2}{P_{i}+Q_{i}\ }
    """
    return 1 / 2 * np.sum(np.power(p - q, 2) / (p + q))

def sigmoid(z):
    
        return 1/(1 + np.exp(-z))


In [3]:
query = np.array([[1,10.],[2,50.]])
target = np.array([[1,20.],[2,40.]])

In [4]:
harmonic_mean = TunaSims.ExpandedTuna(query_normalized_intensity_a = 1,
                                  target_normalized_intensity_a = 1,
                                  sigmoid_score = False,
                                  mult_a = 2,
                                  add_norm_b = 1)

prob = TunaSims.ExpandedTuna(query_normalized_intensity_a = 1,
                                  target_normalized_intensity_a = 1,
                                  sigmoid_score = False,
                                  dif_a = 1/2,
                                  dif_b = 2,
                                  add_norm_b = 1)

In [5]:
print(f'harmonic_mean: {harmonic_mean.predict(query, target) - harmonic_mean_distance(query[:,1]/sum(query[:,1]), target[:,1]/sum(target[:,1]))}')
print(f'Chisquare: {prob.predict(query, target) - probabilistic_symmetric_chi_squared_distance(query[:,1]/sum(query[:,1]), target[:,1]/sum(target[:,1]))}')

harmonic_mean: 2.2075794303688667e-08
Chisquare: -3.449342873829142e-09


In [16]:
demo_matches = pd.read_pickle('/Users/jonahpoczobutt/projects/TunaRes/metlinGnps_NIST20_matchedPol/intermediateOutputs/splitMatches/train/10_ppm/chunk_1.pkl')
#demo_matches['score'] = 1 - demo_matches['InchiCoreMatch']
matched_scores = list()
for i in range(len(demo_matches)):
    
    matched = tools_fast.match_spectrum(demo_matches.iloc[i]['query'], demo_matches.iloc[i]['target'], ms2_da = 0.05)
    matched_scores.append(sigmoid(harmonic_mean_distance(matched[:,1]/sum(matched[:,1]), matched[:,2]/sum(matched[:,2]))))


demo_matches['score'] = matched_scores



In [27]:
init_vals = {
    'mult_a' : 10,
    'add_norm_b' : 0,
    'add_norm_a': 0.5,
    'query_normalized_intensity_a': 1.0,
    'target_normalized_intensity_a': 1.0,
}

fixed_vals = {'sigmoid_score' : True,           
    }

bounds = {'add_norm_b': (0, 2),
          'add_norm_a': (0, 3)}

testerooni = func_ob('teesterooni',
                     sim_func = TunaSims.ExpandedTuna,
                     init_vals = init_vals,
                     fixed_vals = fixed_vals,
                     bounds = bounds,
                     max_iter = 100000,
                     lambdas = 2)

In [28]:
testerooni.fit(demo_matches, verbose = 10000)


completed 10000 iterations
9.927692857888239
1.4018383966179075
0.9999999670697464
completed 20000 iterations
9.92769286119017
1.4018384005209774
0.9999999140885301
completed 30000 iterations
9.927692862026849
1.4018384850700667
0.9999999707239273
completed 40000 iterations
9.927692860077459
1.4018384873257586
0.9999999512182258
completed 50000 iterations
9.927692853730544
1.4018385038467431
1.0000001652463468
completed 60000 iterations
9.927692859084528
1.4018384707243818
1.0000000617791964
completed 70000 iterations
9.927692860393202
1.4018385004324303
1.0000000463699148
completed 80000 iterations
9.927692861737912
1.4018385192035507
0.9999999513833372
completed 90000 iterations
9.927692863671304
1.4018385281716899
0.9999999940302529
completed 100000 iterations
9.927692860039048
1.401838570301616
1.00000002396219


In [None]:
testerooni.sim_func.query_intensity_int

In [None]:
testerooni.sim_func.grads1_score_agg

In [None]:
demo_matches.iloc[522]
matched = tools_fast.match_spectrum(demo_matches.iloc[522]['query'], demo_matches.iloc[522]['target'], ms2_da = 0.05)
harmonic_mean_distance(matched[:,1]/sum(matched[:,1]), matched[:,2]/sum(matched[:,2]))

In [None]:
len(matched)

In [None]:
sum(testerooni.sim_func.expanded_mults/testerooni.sim_func.add_norm)

In [None]:
testerooni.pred_val, testerooni2.pred_val

In [None]:
testerooni.sim_func.mult_a

In [None]:
testerooni.sim_func.add_norm_b