In [10]:
import sys
import argparse
import numpy as np
from scipy.stats import chi2_contingency
from itertools import product
import itertools as it

from _plotly_future_ import v4_subplots
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
init_notebook_mode(connected=True)
    
from datetime import datetime
import tempfile
import os
import gzip 
import subprocess


## Sim pipeline


In [11]:
from tools.SLiM_pipe_tools import (
    read_chrom_sizes, region_sample, region_samplev2,
    fasta_RextractUnif, return_seqs, write_fastaEx, 
    process_recipe, SLiM_dispenserv1, 
)

>  Establish directories

In [3]:
## directories
main_dir= '/mnt/d/GitHub/fine-scale-mutation-spectrum-master/slim_pipe/'
slim_dir= '/mnt/d/GitHub/fine-scale-mutation-spectrum-master/slim_pipe/'
fastas_dir= '/mnt/d/GitHub/fine-scale-mutation-spectrum-master/slim_pipe/Fastas/'
##

dir_data= main_dir + 'mutation_counter/data/sims/'
count_dir= main_dir + 'mutation_counter/count/'
dir_launch= main_dir + 'mutation_counter'
slim_soft= slim_dir + 'sim*'

summary_file= 'sims.log'
mutlog= 'toMut.log'


### Provide SLiM recipe. 

Written to take arguments passed on below.

In [4]:
#
##
##
## SLiM recipe.
sim_dir= main_dir + 'Recipes/Human_sims/'
sim_recipe= 'Gravel_2011_frame_sample.slim'
sim_recipe= sim_dir + sim_recipe
##
##
#

### Sample Fasta

Choose assembly. whole genome (chr separated) file in `fasta_dir` above.

- Determine here the number and length of the segments simulated, using variables `N` and `L` respectively.

In [5]:

assembly= 'hg38'

## files & variables
## fasta segment lengths; number of segments / sims.
L= int(1e6)
N= 20


In [6]:
## Read chrom_sizes file to decide where to sample files from. 
chrom_sizes= read_chrom_sizes(assembly)

## Sample fasta.
##
fasta= fastas_dir + assembly + '.fa.gz'
rseqs= region_samplev2(L, chrom_sizes, N, fasta)


{'10': 3, '15': 1, '3': 1, '2': 2, '19': 1, '17': 1, '13': 2, '8': 1, '9': 1, '16': 2, '11': 1, '6': 1, '5': 3}
opening fasta chr: 10
opening fasta chr: 11
opening fasta chr: 13
opening fasta chr: 15
opening fasta chr: 16
opening fasta chr: 17
opening fasta chr: 19
opening fasta chr: 2
opening fasta chr: 3
opening fasta chr: 5
opening fasta chr: 6
opening fasta chr: 8
opening fasta chr: 9


### Launch Simulations.

> select batch name (suffix tag in files generated).

In [7]:
## Simulation tag names, assembly to select from.
batch_name= 'Gravel'

**Cookbook**

Simulations are launched using the template file `sim_recipe` (see above). These files are prepared to accept certain varibles. These are to be passed to a `SLiM_dispenser` function in dictionary format. `cook_constants_*` functions prepare this dictionary.

- Below, the function `cook_constants_Gravel2sampleRange` samples two populations along a range of relative sizes, based on a recipe that takes 3 population size variables.

In [8]:
from tools.SLiM_pipe_tools import SLiM_dispenserv1
from tools.cookbook import cook_constants_Gravel2sampleRange

## Perform Simulations
print('launch SLiM jobs.')

cookargs= {
    "nrange": [.05,.5], 
    "step": N,
    "Nmax":100
}

sim_store, cookID= cook_constants_Gravel2sampleRange(rseqs,dir_data= dir_data,
               slim_dir= slim_dir, batch_name= batch_name,**cookargs)


SLiM_dispenserv1(sim_store, sim_recipe, cookID= cookID, slim_dir= slim_dir, batch_name= batch_name,
                    ID= cookID, logSims= summary_file, mutlog= mutlog)

launch SLiM jobs.


### Launch Mutation counter. 

Launch the mutation counter pipeline. 

In [12]:
from tools.SLiM_pipe_tools import mutation_counter_launch

mutlog= 'toMut.log'

print('launch mutation counter.')
mutation_counter_launch(mutlog,count_dir= count_dir, 
                        dir_launch= dir_launch,main_dir= main_dir)


launch mutation counter.
