In [1]:
# Simulation for the evolution of influenza-like antigenic protein sequences

# import modules
import time
import numpy as np
import os
import logging
import pickle
from pypet import Environment, cartesian_product, progressbar, Parameter
from datetime import date
today = date.today()
strdate_today = today.strftime("%Y%b%d")
repository_path = os.path.normpath('C:/Users/julia/Documents/Resources/InfluenzaFitnessLandscape/NewApproachFromMarch2021/'
                                   'InfluenzaFitnessInference')
# import custom module
import fitnessinference.simulation as simu
import fitnessinference.tests.test_simulation as simu_test

# create folders for data and plots 
# (use os package to make sure it works on linux and windows)

# result_directory = os.getcwd() # use the current directory for cluster simulations
result_directory = repository_path
# folder to store results:
folder = os.path.join(result_directory, 'results', 'simulations')
# subfolder to store temporary results
temp_folder = os.path.join(folder, strdate_today+'_temp')
if not os.path.isdir(folder):
    os.makedirs(folder)
if not os.path.isdir(temp_folder):
    os.makedirs(temp_folder)
# filename for final pypet results of the experiment
filename = os.path.join(folder, strdate_today+'.hdf5')

# filepath for logs and storage of intermediate files
filepath = temp_folder

In [3]:
def simulation(traj, filepath):
    """
    simulate the sequence evolution
    
    Parameters:
    
    filepath: path where to save intermediate results
    
    traj: pypet.trajectory.Trajectory
            trajectory container, which manages the parameters
            
    Results:
    
    strain_yearly: list 
            [[list of unique sequences (strains)] 
            for each time step] 
            with strains from most to least prevalent at each time
            
    strain_frequency_yearly: list
            [[list of frequencies of strains] 
            for each time step] 
            in same order as in strain_yearly
            
    tree_yearly: list
            [[[parent strain index, child strain index] for each ancestrial pair] 
            for each time step] 
            each unique parent-child strain combination is added once
            
    branch_weight_yearly: list
            [[list of counts for each occurrence of a specific parent-child combi] 
            for each time step] 
            the index of the branch weight is the same 
            as the index of the [parent,child] in tree_yearly 
            for the same time step
            
    pickled .data files with intermediate simulation results
            
    Returns:
    
    None
    
    Dependencies:
    
    import numpy as np
    from pypet import Environment, Parameter
    import fitnessinference.simulation as simu
    import os
    import pickle
    
    """
    # initializations:
    
    # set RNG seed:
    np.random.seed(traj.seed)
    # current sequences, numpy array, initialized with all zeros
    seqs = np.zeros((traj.N_pop, traj.N_site)) 
    # strains at each time, list, initialized with initial strain
    strain_yearly = [np.array([list(seqs[0])])]
    # strain indices for each current sequence, list, initialized with zeros
    seq_strain_idx = [0 for i in range(traj.N_pop)] 
    # strain frequencies at each time, list, initialized with 1
    strain_frequency_yearly = [np.array([1])] 
    # strain indices for each parent and child, list, initialized empty
    tree_yearly = []
    # number of times each tree branch is taken. list
    branch_weight_yearly = []
    # current list of exp(fitness) for each sequence, initialized with zeros
    pfit_0 = [0]
    # set fitness coefficients according to the selected rule
    if traj.hJ_coeffs=='constant':
        h_model, J_model = simu.fitness_coeff_constant(traj.N_site,traj.N_state,traj.h_0,traj.J_0)

    # filenames for intermediate results:
    name_drop = len('parameters.') # from parameter names length of first part
    params = ''
    # add each parameter with name and value into name:
    for key, value in traj.f_get_parameters(1).items():
        if isinstance(value, int) and (value<100 or key[name_drop:]=='seed'):
            params += key[name_drop:] + '_%.i' % value 
        elif isinstance(value, float) or (isinstance(value, int) and value>100):
            params += key[name_drop:] + '_%.e' % value
        elif isinstance(value, type(str)):
            params += key[name_drop:] + '_' + value + '_'
            
    filename = os.path.join(filepath, 'running_' + params + '.data')
    
    # simulation of sequence evolution:
    for t in range(traj.N_simu):
        
        # mutate sequences
        seqs_m = simu.mutate_seqs(seqs, traj.N_state, traj.mu)
        
        # determine fitnesses
        
        # strains and strain counts/frequencies
        strain_current, strain_count_current =\
            np.unique(seqs_m, return_counts=True, axis=0) 
        strain_frequency_current = strain_count_current/np.sum(strain_count_current)

        # intrinsic fitness
        f_int_list =\
            simu.fitness_int_list(strain_current, traj.N_state, h_model, J_model)
        # host-dependent fitness
        f_host_list =\
            simu.fitness_host_list(strain_current, strain_yearly, 
                                   strain_frequency_yearly, traj.sigma_h, traj.D0)
        
        # select surviving seqs
        
        # update and save data
        strain_yearly.append(strain_current)
        strain_frequency_yearly.append(strain_frequency_current)
        

In [4]:
start = time.time()

simulation(traj, filepath)

end = time.time()
print(end - start)

0.5843679904937744


In [2]:
# create environment and run the simulation using pypet

# make use of logging
logger = logging.getLogger()

# Create an environment
env = Environment(trajectory=strdate_today,
                  multiproc=True,
                  ncores=4,
                  wrap_mode='QUEUE',
                  filename=filename)

# Exctract the trajectory
traj = env.traj

# use the add_parameter function to add the parameters
simu.add_parameters(traj)

# define the parameter exploration for this experiment
exp_dict = {'N_pop' : [10, 100],
           'h_0': [-7, -5, -1, 0]}

exp_dict = cartesian_product(exp_dict)
# add the exploration to the trajectory
traj.f_explore(exp_dict)

# Run the simulation
#logger.info('Starting Simulation')
#env.run(simulation)

MainProcess pypet.storageservice.HDF5StorageService INFO     I will use the hdf5 file `C:\Users\julia\Documents\Resources\InfluenzaFitnessLandscape\NewApproachFromMarch2021\InfluenzaFitnessInference\results\simulations\2021Apr06.hdf5`.
MainProcess pypet.environment.Environment INFO     Environment initialized.


In [None]:
simulation(traj)
    

In [None]:
print([item for item in enumerate(traj.parameters.f_iter_nodes())])