In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
from typing import *

In [40]:
protein='1fme'

raw_path = f'{protein}/raw.h5'
summary_path = f'{protein}/summary.h5'
timescales = pd.DataFrame(pd.read_hdf(raw_path, key='timescales'))

timescales.reset_index(inplace=True)
timescales.rename(mapper={0: 'value'}, axis=1, inplace=True)

In [41]:
def ev(row): 
    tau = row['lag']
    x = row['value']
    return np.exp(-tau/x)

In [42]:
eigenvalues = timescales.copy(deep=True)
eigenvalues['value'] = timescales.apply(lambda x: ev(x), axis=1)

In [48]:
ev_summary = eigenvalues.groupby(['hp_ix', 'lag', 'process'])['value'].agg(
            median=lambda x: np.nanquantile(x, 0.5),
            lb=lambda x: np.nanquantile(x, 0.025),
            ub=lambda x: np.nanquantile(x, 0.975),
            count=lambda x: x.shape[0]-x.isna().sum()
)
ev_summary.reset_index(inplace=True)

In [49]:
ev_summary.to_hdf(summary_path, key='eigenvalues')

In [67]:
vamp_eqs = eigenvalues.copy(deep=True)
vamp_eqs['value'] = vamp_eqs['value']**2
vamp_eqs.head()
vamp_eqs.sort_values(by=['hp_ix', 'bs_ix', 'lag', 'process'], inplace=True)
vamp_eqs['value2'] = vamp_eqs.groupby(['hp_ix', 'bs_ix', 'lag'])['value'].cumsum() 
vamp_eqs['score'] = 1 + vamp_eqs['value2']

In [69]:
vamp_eq_summary = vamp_eqs.groupby(['hp_ix', 'lag', 'process'])['score'].agg(
            median=lambda x: np.nanquantile(x, 0.5),
            lb=lambda x: np.nanquantile(x, 0.025),
            ub=lambda x: np.nanquantile(x, 0.975),
            count=lambda x: x.shape[0]-x.isna().sum()
)
vamp_eq_summary.reset_index(inplace=True)

In [71]:
vamp_eq_summary.to_hdf(summary_path, key='vamp_eqs')