In [1]:
import TunaSims
import numpy as np
from funcOb import func_ob
import pandas as pd

In [2]:
def harmonic_mean_distance(p, q):
    r"""
    Harmonic mean distance:

    .. math::

        1-2\sum(\frac{P_{i}Q_{i}}{P_{i}+Q_{i}})
    """
    return 2 * np.sum(p * q / (p + q))

def lorentzian_distance(p, q):
    r"""
    Lorentzian distance:

    .. math::

        \sum{\ln(1+|P_i-Q_i|)}
    """
    return np.sum(np.log(1 + np.abs(p - q)))

def matusita_distance(p, q):
    r"""
    Matusita distance:

    .. math::

        \sqrt{\sum(\sqrt{P_{i}}-\sqrt{Q_{i}})^2}
    """
    return np.sum(np.power(np.sqrt(p) - np.sqrt(q), 2))

def probabilistic_symmetric_chi_squared_distance(p, q):
    r"""
    Probabilistic symmetric χ2 distance:

    .. math::

        \frac{1}{2} \times \sum\frac{(P_{i}-Q_{i}\ )^2}{P_{i}+Q_{i}\ }
    """
    return 1 / 2 * np.sum(np.power(p - q, 2) / (p + q))

In [3]:
query = np.array([[1,10],[2,50]])
target = np.array([[1,20],[2,40]])

In [4]:
harmonic_mean = TunaSims.ExpandedTuna(query_intensity_a = 1,
                                  target_intensity_a = 1,
                                  sigmoid_score = False,
                                  mult_a = 2)

prob = TunaSims.ExpandedTuna(query_intensity_a = 1,
                                  target_intensity_a = 1,
                                  sigmoid_score = False,
                                  dif_a = 1/2,
                                  dif_b = 2)

In [5]:
print(f'harmonic_mean: {harmonic_mean.predict(query, target) - harmonic_mean_distance(query[:,1], target[:,1])}')
print(f'Chisquare: {prob.predict(query, target) - probabilistic_symmetric_chi_squared_distance(query[:,1], target[:,1])}')

harmonic_mean: 4342.222222222223
Chisquare: 97.77777777777777


In [6]:
prob.grads1_int_param

{'query_intensity_a': array([10., 50.], dtype=float32),
 'target_intensity_a': array([20., 40.], dtype=float32)}

In [7]:
init_vals = {
    'query_normalized_intensity_a': 1.0,
    'target_normalized_intensity_a': 1.0,
    'dif_a': 1,
    'dif_b' : 1
}

fixed_vals = {'sigmoid_score' : False}

testerooni = func_ob('teesterooni',
                     sim_func = TunaSims.ExpandedTuna,
                     init_vals = init_vals,
                     fixed_vals = fixed_vals)


In [8]:
demo_matches = pd.read_pickle('/Users/jonahpoczobutt/projects/TunaRes/metlinGnps_NIST20_matchedPol/intermediateOutputs/splitMatches/train/10_ppm/chunk_1.pkl')
demo_matches['score'] = 1 - demo_matches['InchiCoreMatch']

In [9]:
testerooni.fit(demo_matches, verbose = 100)

1.7719636
query_normalized_intensity_a 0.8263531
loss grad: 1.5439271926879883
step 0.012758289803514344
target_normalized_intensity_a 0.9456106
loss grad: 1.5439271926879883
step 0.01459953892522492
dif_a 1.7719636
loss grad: 1.5439271926879883
step 0.027357827808486945
dif_b -5.520637512207031
loss grad: 1.5439271926879883
step -0.08523462376069801
completed 0 iterations
1.3186042
query_normalized_intensity_a 0.67833126
loss grad: 0.6372084617614746
step 0.004322384161449122
target_normalized_intensity_a 0.7725976
loss grad: 0.6372084617614746
step 0.004923057352343391
dif_a 1.3556931
loss grad: 0.6372084617614746
step 0.008638591160714668
dif_b -3.7429794296368617
loss grad: 0.6372084617614746
step -0.02385058164763746
completed 0 iterations
1.2160175
query_normalized_intensity_a 0.6437533
loss grad: 0.432034969329834
step 0.0027812393297733705
target_normalized_intensity_a 0.7301643
loss grad: 0.432034969329834
step 0.0031545650640885017
dif_a 1.2614243
loss grad: 0.432034969329834

KeyboardInterrupt: 

In [None]:
testerooni.sim_func.query_normalized_intensity_a

0.9966945934295655