In [1]:
import os
import sys

import numpy as np
import pandas as pd
import scipy.stats
import eqtk
import itertools


In [2]:
pwd = os.path.abspath('../..')
sys.path.append(os.path.join(pwd, 'code/')) 
import utilities
from utilities import *

Plan: Load pre-computed equlibrium concentrations generated by simulating 1-input dimerization networks of various sizes (3 monomers - 6 monomer species) with randomly sampled binding affinities and accessory monomer concentration (see 20220617_sim1inputTitration_randomParams_highRes.ipynb for details). 

To generate the network "output" we want to sum up the dimer concentrations at equlibrium with some non-negative weight for each dimer. Generate a set of output weights to apply across all simulations. Save the data and move to a new notebook to paramaterize and cluster the output. 

Note that the number of possible output weights scales exponentially with the number of dimers (which scales quadratically with the number of monomers). So if we have 3 possible output weight values (e.g. 0, 1, and 2), then for a 6 monomer network,  with 21 dimers, there are $3^{21}$ ~ $10^9$ possible output weight combinations. This is too many to compute exhaustivally at this point. Instead, use latin-hypercube sampling to generate a semi-random collection of output weights. 

Previously, with the smaller input titration, I also generated output weight vectors by restricting the total number of "active" dimers (i.e. dimers with non-zero output weights) and then, for a given sized network, permuting which dimers are active (see 20220612_sim1input_randomOutputWeights.ipynb). Unfortunately, for the larger "highRes" input titration, this approach for generating output weights and computing outputs is too memory intensive. 

In [3]:
def LHS_weights(n_dimers, k, lb=1, ub=10, 
                   centered = False, log = False, seed = 42):
    """
    Given n_dimers, sample k output weight vectors from 
    latin hypercube with lower bound = lb and upper bound = ub. 
    If log = True, then np.log10(lower bound) = lb and np.log10(upper bound) = ub.
    
    Returns
    -------
    out_weights : array_like, shape (k, n_dimers)
    """
    lhs_sampler =  scipy.stats.qmc.LatinHypercube(d=n_dimers, centered=centered, seed=seed)
    out_weights = lhs_sampler.random(n=k)
    out_weights = scipy.stats.qmc.scale(out_weights, [lb]*n_dimers, [ub]*n_dimers)
    if log:
        return np.power(10, out_weights)
    else:
        return out_weights
     


In [4]:
def compute_output_random_weights(n_monomers, n_weights, lb=-3, ub=3, 
                                  log=True, centerLHS=False, 
                                  seed=42, save = False, outfileprefix = ''):
    """
    Load equil concentrations for network of size n_monomers simulated with semi-random parameters
    then generate semi-random output weights and compute dot product. 
    
    Parameters
    ----------
    n_monomers : int. 3 - 6 valid
        network size
    n_weights : int
        If sample_method == 'LHS', n_weights corresponds to the total number of weight vectors. 
    lb : int. Default -3
        Lower bound for output weight values. 
    ub : int. Defaul 3
        Upper bound for output weight values. 
    log : bool. Default True
        Place output weights on log scale
    centerLHS : bool. Default False. 
        see scipy.stats.qmc.LatinHypercube()
    seed : int. Default 42
    save : bool. Default False. 
    outfileprefix : string. Default ''. 
    
    Return
    ------
    out_weights : array_like, shape (n_dimers, n_weights)
    out : array_like, shape (S_all.shape[0], S_all.shape[2], n_weights) = (n_input_titration, n_param_univerese, n_weights) 
    """
    n_dimers = number_of_dimers(n_monomers)
    
    S_all = np.load(f'../../data/20220617_1input_randomParams_highRes/S_all_{n_monomers}M_1000k.npy')
    
    out_weights = LHS_weights(n_dimers, n_weights, lb=lb, ub=ub, 
               centered=centerLHS, log=log, seed=seed)
    #Transpose for matrix multiplication
    out_weights = out_weights.T
            
    #Rearrange axes of S_all to broadcast matrix multiplication
    S_all = np.moveaxis(S_all, 2, 0)
    out = np.matmul(S_all[:,:,n_monomers:], out_weights)
    #Reshape out
    out = np.moveaxis(out, 1, 0)
    if save:
        np.save(f'{outfileprefix}out_weights_{n_monomers}M_LHSsample_{out_weights.shape[1]}k.npy', out_weights)
        np.save(f'{outfileprefix}output_{n_monomers}M_LHSsample_{out_weights.shape[1]}k.npy', out)

    return out_weights, out

In [5]:
outfileprefix = '../../data/20220617_1input_randomParams_highRes/'

In [6]:
for i in range(3,7):
    _, _ = \
    compute_output_random_weights(i, 1000, lb=-3, ub=3,
                              log=True, centerLHS=False, 
                              seed=42, save = True, outfileprefix = outfileprefix)

In [98]:
def plot_outcurves(outmat, facet = False, ylim=(-0.05,1.05), xticks = [0,4,9], xticklabels = [-3,0,3]):
    """
    """
    n_univ = outmat.shape[0]
    n_outweights = outmat.shape[2]
    n_titration = outmat.shape[1]
    
    xticklabels = [f'$10^{{{i}}}$' for i in xticklabels]
    if facet:
        fig, axes = plt.subplots(n_univ, n_outweights, figsize=(n_outweights*5, n_univ*5), squeeze=False, constrained_layout=True)
        for univ in range(n_univ):
            for weight in range(n_outweights):
                axes[(univ,weight)].plot(np.arange(n_titration), outmat[univ,:,weight])
                axes[(univ,weight)].set(aspect = 7, ylim=ylim, ylabel='out',
                                        xticks = xticks, xticklabels = xticklabels,
                                        title=f'univ: {univ}X weight:{weight}')
    else:
        fig, axes = plt.subplots(1, n_univ, figsize=(n_univ*5, 5), squeeze=False, constrained_layout=True)
        for univ in range(n_univ):
            for weight in range(n_outweights):
                axes[(0,univ)].plot(np.arange(n_titration), outmat[univ,:,weight], label=f'{weight}')
                axes[(0,univ)].set(aspect = 7, ylim=ylim, ylabel='out',
                                   xticks = xticks, xticklabels = xticklabels,
                                   title=f'univ: {univ}')
                axes[(0,univ)].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title='weight set')

    return fig, axes
        
    

To try to account for difference in the number of dimers for different network sizes, 
generate a common set of output weights that can be used across networks. 
allow for d active dimers and set all other dimer outweights to 0. 
Try assigning them to "equivalent" dimers (e.g. D23 ~ D25). Groups of "equivalent" dimers:
* D11
* D1x (heterodimers with M1)
* Dxx (homodimers of accessory) 
* Dxy (heterodimers of accessory)

Ignoring D11, there are 7 ways to select 3 active dimers ((1,1,1), (1,2,0), (1,0,2), (0,1,2), (0,2,1), (2,1,0), (2,0,1)). 

To start, choose 1 of each dimer option. Let's first try D12, D23, and D33 since these are present in all network. 

In [6]:
rng = np.random.default_rng(42)
active_weights = np.power(10, rng.uniform(-3, 3, size=(1000, 3)))

In [32]:
for m in range(3,7):
    
    species_names = make_nXn_species_names(m)
    dimer_names = species_names[m:]
    n_dimers = number_of_dimers(m)
    active_dimer_indx = np.where(np.isin(dimer_names, ['D12', 'D23', 'D33']))[0]
    
    S_all = np.load(f'../../data/20220617_1input_randomParams_highRes/S_all_{m}M_1000k.npy')
    
    out_weights = np.zeros((1000, n_dimers))
    out_weights[:,active_dimer_indx] = active_weights
    #Transpose for matrix multiplication
    out_weights = out_weights.T
            
    #Rearrange axes of S_all to broadcast matrix multiplication
    S_all = np.moveaxis(S_all, 2, 0)
    out = np.matmul(S_all[:,:,m:], out_weights)
    #Reshape out
    out = np.moveaxis(out, 1, 0)
    
    np.save(f'../../data/20220617_1input_randomParams_highRes/out_weights_{m}M_3activeDimers_{out_weights.shape[1]}k.npy', out_weights)
    np.save(f'../../data/20220617_1input_randomParams_highRes/output_{m}M_3activeDimers_{out_weights.shape[1]}k.npy', out)



Now randomly select 3 active dimers (excluding D11)

In [15]:
for m in range(3,7):
    
    n_dimers = number_of_dimers(m)
    S_all = np.load(f'../../data/20220617_1input_randomParams_highRes/S_all_{m}M_1000k.npy')
    
    out_weights = np.zeros((1000, n_dimers))
    for i in range(1000):
        active_dimer_indx = rng.choice(np.arange(1, n_dimers), 3, replace=False)
        out_weights[i,active_dimer_indx] = active_weights[i, :]
        
    #Transpose for matrix multiplication
    out_weights = out_weights.T
            
    #Rearrange axes of S_all to broadcast matrix multiplication
    S_all = np.moveaxis(S_all, 2, 0)
    out = np.matmul(S_all[:,:,m:], out_weights)
    #Reshape out
    out = np.moveaxis(out, 1, 0)
    
    np.save(f'../../data/20220617_1input_randomParams_highRes/out_weights_{m}M_3randomActiveDimers_{out_weights.shape[1]}k.npy', out_weights)
    np.save(f'../../data/20220617_1input_randomParams_highRes/output_{m}M_3randomActiveDimers_{out_weights.shape[1]}k.npy', out)

