In [1]:
import TunaSims
import numpy as np
from funcOb import func_ob
import pandas as pd
import tools_fast

In [2]:
def harmonic_mean_distance(p, q):
    r"""
    Harmonic mean distance:

    .. math::

        1-2\sum(\frac{P_{i}Q_{i}}{P_{i}+Q_{i}})
    """
    return 2 * np.sum(p * q / (p + q))

def lorentzian_distance(p, q):
    r"""
    Lorentzian distance:

    .. math::

        \sum{\ln(1+|P_i-Q_i|)}
    """
    return np.sum(np.log(1 + np.abs(p - q)))

def matusita_distance(p, q):
    r"""
    Matusita distance:

    .. math::

        \sqrt{\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2}
    """
    return np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2))

def probabilistic_symmetric_chi_squared_distance(p, q):
    r"""
    Probabilistic symmetric χ2 distance:

    .. math::

        \frac{1}{2} \times \sum\frac{(P_{i}-Q_{i}\ )^2}{P_{i}+Q_{i}\ }
    """
    return 1 / 2 * np.sum(np.power(p - q, 2) / (p + q))

def sigmoid(z):
    
        return 1/(1 + np.exp(-z))


In [3]:
query = np.array([[1,10.],[2,50.]])
target = np.array([[1,20.],[2,40.]])

In [35]:
harmonic_mean = TunaSims.ExpandedTuna(query_normalized_intensity_a = 1,
                                  target_normalized_intensity_a = 1,
                                  sigmoid_score = False,
                                  mult_a = 2,
                                  add_norm_b = 1)

prob = TunaSims.ExpandedTuna(query_normalized_intensity_a = 1,
                                  target_normalized_intensity_a = 1,
                                  sigmoid_score = False,
                                  dif_a = 1/2,
                                  dif_b = 2,
                                  add_norm_b = 1)

In [36]:
print(f'harmonic_mean: {harmonic_mean.predict(query, target) - harmonic_mean_distance(query[:,1]/sum(query[:,1]), target[:,1]/sum(target[:,1]))}')
print(f'Chisquare: {prob.predict(query, target) - probabilistic_symmetric_chi_squared_distance(query[:,1]/sum(query[:,1]), target[:,1]/sum(target[:,1]))}')

harmonic_mean: 2.2075794303688667e-08
Chisquare: -3.449342873829142e-09


In [6]:
# demo_matches = pd.read_pickle('/Users/jonahpoczobutt/projects/TunaRes/metlinGnps_NIST20_matchedPol/intermediateOutputs/splitMatches/train/10_ppm/chunk_1.pkl')
# demo_matches['score'] = 1 - demo_matches['InchiCoreMatch']
# matched_scores = list()
# for i in range(len(demo_matches)):
    
#     matched = tools_fast.match_spectrum(demo_matches.iloc[i]['query'], demo_matches.iloc[i]['target'], ms2_da = 0.05)
#     matched_scores.append(sigmoid(harmonic_mean_distance(matched[:,1]/sum(matched[:,1]), matched[:,2]/sum(matched[:,2]))))


# demo_matches['score'] = matched_scores

demo_matches = pd.read_pickle('harmonic_yerp.pkl')

In [43]:
init_vals = {
    'target_normalized_intensity_a': 1,
    'mult_a' : 1,
    'add_norm_b' : 1,
    'add_norm_a': 1,
    'query_normalized_intensity_a': 1,         
}

fixed_vals = {'sigmoid_score' : True,            
    }

bounds = {'add_norm_b': (0, 2),
          'add_norm_a': (0, 3)}

testerooni = func_ob('teesterooni',
                     sim_func = TunaSims.ExpandedTuna,
                     init_vals = init_vals,
                     fixed_vals = fixed_vals,
                     bounds = bounds,
                     max_iter = 100000,
                     lambdas = 3,
                     tol = 1e-10)

In [44]:
testerooni.running_grad

In [45]:
testerooni.running_grad

In [46]:
testerooni.n_iter

0

In [47]:
testerooni.fit(demo_matches, verbose = 1000)
print(testerooni.converged)


completed 1000 iterations
1.1796711973509633
0.6430408586757795
0.9996511848376897
1.078126974302368
1.1045418297686833
1.2635584538695993e-05
completed 2000 iterations
1.1788976932471913
0.6418710028363426
0.9989175884450892
1.0808742518063539
1.1010234173064661
9.710659629303153e-06
completed 3000 iterations
1.177628500557064
0.6393303336222561
0.9987801742131306
1.0837042599901587
1.0968737993774835
3.1581228764431446e-07
completed 4000 iterations
1.177254970289432
0.6412962361905129
0.9998473674768532
1.0848322271149347
1.0953556536508307
3.021854369817982e-06
completed 5000 iterations
1.1755723442822998
0.6400433341417067
1.0000335812756103
1.0859599586861397
1.0924256609384713
1.040689465713508e-05
completed 6000 iterations
1.175220744165487
0.6395445780748179
1.0000061226411128
1.0869277014766519
1.0910839656950695
1.5066566590793985e-06
completed 7000 iterations
1.174188882226232
0.6390932525105376
1.0001040021914318
1.0871788142333263
1.0897183465853202
1.187242110063914e-08
c

In [12]:
testerooni.n_iter

8319

In [13]:
testerooni.sim_func.target_normalized_intensity_a

1.6922533646790674

In [14]:
testerooni.running_grad

8.980352927746797e-11