
# Baseline double drive simulator

23-02-21

Katie Willis

katie.willis16@imperial.ac.uk

___

This file contains functions required to simulate the release of a 2 locus gene drive into a single panmictic population and monitor the population through time. We model 2 loci, each with 3 alleles, and therefore 9 haploid genotypes and 45 diploid genotypes. We begin with a matrix containing numbers of individuals of each genotype after transgenic release, monitoring males and females separately. To model processes such as fitness costs, homing, sex ratio bias and recombination during each generation, we multiply (element-wise) the numbers of individuals per genotype by a series of transition matrices for each process, built using user-defined parameters. The output is a dictionary containing time-series of allele and genotype frequencies, numbers of indididuals and the correlation between the two gene drive constructs.
___

## Parameter and matrix definition

### Allele and genotype lists

We model 2 loci, each with 3 alleles, and therefore 9 haploid genotypes and 45 diploid genotypes.

* First locus: 
    * A = WT
    * a = Transgene
    * α = Resistant
* Second locus: 
    * B = Target site 
    * b = Transgene
    * β = Resistant
    
*Note that labels differ compared to publication, where in the publication a and b refer to resistant loci and α and β refer to the construct.

In [1]:
"""
Defines lists of alleles and genotypes as strings.
"""
function define_alleles_and_genotypes()
    
    A_alleles = ['A','a','α']
    B_alleles = ['B','b','β'];
    
    #Define all A and B allele combinations: Creating haploid genotypes (e.g. AB, Ab etc.)
    alleles = []
    for A in A_alleles
        for B in B_alleles
            push!(alleles,(A*B))
        end
    end;
    
    #Define all diploid genotypes at the first locus (e.g. AA, Aa etc.)
    A_genotypes = []
    for i in 1:length(A_alleles)
        allele1 = A_alleles[i]
        for j in i:length(A_alleles)
            allele2 = A_alleles[j]
            push!(A_genotypes,(allele1*allele2))
        end
    end;

    #Define all diploid genotypes at the second locus (e.g. BB, Bb etc.)
    B_genotypes = []
    for i in 1:length(B_alleles)
        allele1 = B_alleles[i]
        for j in i:length(B_alleles)
            allele2 = B_alleles[j]
            push!(B_genotypes,(allele1*allele2))
        end
    end;
    
    #Define all genotypes for both A and B loci combined (e.g. ABAB, ABAb etc.)
    genotypes = []
    for i in 1:length(alleles)
        allele1 = alleles[i]
        for j in i:length(alleles)
            allele2 = alleles[j]
            push!(genotypes,([allele1 allele2]))
        end
    end;
    
    return(alleles,A_alleles,B_alleles,A_genotypes,B_genotypes,genotypes)
end
"""
Converts lists of alleles and genotypes into lists of symbols for labelling of data structures.
"""
function define_symbolic_alleles_and_genotypes(alleles,A_alleles,B_alleles,A_genotypes,B_genotypes,genotypes)
    ab, aB, aβ, Ab, AB, Aβ, αb, αB, αβ = symbols("ab, aB, aβ, Ab, AB, Aβ, αb, αB, αβ");
    alleles_sym = [Sym(a) for a in alleles]
    A_genotypes_sym = [Sym(a) for a in A_genotypes]
    B_genotypes_sym = [Sym(a) for a in B_genotypes]
    genotypes_sym = [sympify(i[1]*i[2]) for i in genotypes];
    return(alleles_sym,A_genotypes_sym,B_genotypes_sym,genotypes_sym)
end

#alleles and genotype labels are fixed and defined as global variables
global alleles,A_alleles,B_alleles,A_genotypes,B_genotypes,genotypes = define_alleles_and_genotypes();
global alleles_sym,A_genotypes_sym,B_genotypes_sym,genotypes_sym = define_symbolic_alleles_and_genotypes(alleles,A_alleles,B_alleles,A_genotypes,B_genotypes,genotypes);


define_alleles_and_genotypes (generic function with 1 method)

### Parameters

We define a dictionary of all parameters required to run the simulation.

In [8]:
"""
Initiates a dictionary containing all parameters. 
All are set to zero except:
m = 0.5
r = 0.5
Rm = 6 
Nf_eq = 1000 (Number of females or males at equlibrium)
OJ = 1/10
therefore:
f = (2*Rm)/OJ_
α = f_*Nf_eq/(Rm-1)
"""
function Initiate_baseline_parameters()
    
    params = Dict{String,Float64}()

    #expression costs for each possible construct component
    ## for females
    params["scasf"] = 0
    params["hcasf"] = 0
    params["sgRNAf"] = 0
    params["hgRNAf"] = 0
    params["sXSf"] = 0
    params["hXSf"] = 0
    ## for males
    params["scasm"] = 0
    params["hcasm"] = 0
    params["sgRNAm"] = 0
    params["hgRNAm"] = 0
    params["sXSm"] = 0
    params["hXSm"] = 0
    
    #disruption costs
    ## for females
    params["siaf"] = 0
    params["hiaf"] = 0
    params["sraf"] = 0
    params["hraf"] = 0
    params["sibf"] = 0
    params["hibf"] = 0
    params["srbf"] = 0
    params["hrbf"] = 0
    params["siraf"] = 0
    params["sirbf"] = 0
    ## for males
    params["siam"] = 0
    params["hiam"] = 0
    params["sram"] = 0
    params["hram"] = 0
    params["sibm"] = 0
    params["hibm"] = 0
    params["srbm"] = 0
    params["hrbm"] = 0
    params["siram"] = 0
    params["sirbm"] = 0
    
    # Activity costs
    ##X-shredder costs
    params["sXsf"] = 0
    params["sXsm"] = 0
    ##Somatic expression costs
    params["sSaf"] = 0
    params["sSam"] = 0
    params["sSbf"] = 0
    params["sSbm"] = 0
    ##Off target cleavage costs
    params["sHaf"] = 0
    params["sHam"] = 0
    params["sHbf"] = 0
    params["sHbm"] = 0

    # homing rates
    ## in females
    params["caf"] = 0
    params["jaf"] = 0
    params["cbf"] = 0
    params["jbf"] = 0
    ## in males
    params["cam"] = 0
    params["jam"] = 0
    params["cbm"] = 0
    params["jbm"] = 0
    
    #recombination
    params["r"] = 0.5
    
    #sex ratio bias
    params["m"] = 0.5
    
    #population dynamics 
    Rm = 6
    Nf_eq = 1000.0
    params["OJ"] = 1/10
    params["f"] = (2*Rm)/params["OJ"]
    params["α"] = params["f"]*Nf_eq/(Rm-1)

    return(params)
end

"""
Initiates a parameter set based on our selected baseline parameters and:
    - the locus type of A and B (Ntrl, HI, HS)
    - homing dependency of A and B (Constitutive, conditional, none)
    - XS dependency (A or B)
If applied, homing rates are pre-defined according to Kyrou et al 2018
c = 0.971
j = 0.019
if applied, Xshredder rates are pre-defined according to Galizi et al 2014
m = 0.95
Fitness costs due to locus distruption are pre-defined to 1 based on the locus type
Off target cleavage costs are set to 0.01 for both males and females
"""
function Assign_baseline_params(LOCUS_A,
                                LOCUS_B,
                                set_A_homing_dependency,
                                set_B_homing_dependency,
                                set_XS_dependency
                                )
    
    cleavage_Kyrou2018 = 0.971
    joining_Kyrou2018 = 0.019

    p = Initiate_baseline_parameters()
    
    if LOCUS_A == "HS_rec"
        p["siaf"]=1.0 
        p["sraf"]=1.0 
        p["siraf"] = 1.0
    elseif LOCUS_A == "HI"
        p["siaf"]=1.0 
        p["sraf"]=1.0 
        p["siraf"] = 1.0
        p["hiaf"]=1.0 
        p["hraf"]=1.0
    end

    if LOCUS_B == "HS_rec"
        p["sibf"]=1.0 
        p["srbf"]=1.0 
        p["sirbf"] = 1.0
    elseif LOCUS_B == "HI"
        p["sibf"]=1.0 
        p["srbf"]=1.0 
        p["sirbf"] = 1.0
        p["hibf"]=1.0 
        p["hrbf"]=1.0
    end

    if set_A_homing_dependency != "none"
        p["caf"] = cleavage_Kyrou2018 # homing 
        p["jaf"] = joining_Kyrou2018 # homing
        p["cam"] = cleavage_Kyrou2018 # homing 
        p["jam"] = joining_Kyrou2018 # homing
        A_homing_dependency = set_A_homing_dependency
    elseif set_A_homing_dependency == "none"
        #Doesnt matter what this is as the homing rates are set to 0.0
        A_homing_dependency = "constitutive" #re-define for selection of homing matrix
    end

    if set_B_homing_dependency != "none"
        p["cbf"] = cleavage_Kyrou2018 # homing 
        p["jbf"] = joining_Kyrou2018 # homing
        p["cbm"] = cleavage_Kyrou2018 # homing 
        p["jbm"] = joining_Kyrou2018 # homing
        B_homing_dependency = set_B_homing_dependency
    elseif set_B_homing_dependency == "none"
        #Doesnt matter what this is as the homing rates are set to 0.0
        B_homing_dependency = "constitutive" #re-define for selection of homing matrix
    end

    if set_XS_dependency != "none"
        p["m"] = 0.95
        XS_dependency = set_XS_dependency
    elseif set_XS_dependency == "none"
        p["m"] = 0.5
        XS_dependency = "none"
    end
    
    #off target cleavage
    par = 0.01
    p["sHaf"] = par
    p["sHam"] = par
    p["sHbf"] = par
    p["sHbm"] = par
    
    return(p,A_homing_dependency,B_homing_dependency,XS_dependency)
end

Assign_baseline_params

### Fitness

Fitness costs are calculated based on 3 different processes. The first two are locus specific, including costs due to locus disruption by either construct insertion or cleavage resistant mutation, and costs due to expression of the construct. The third is due to construct activity, which includes off-target Cas9 cleavage, somatic Cas9 cleavage and X-shredding, each of which may or may not depend on interaction between transcriptional units from different constructs. 
Fitness costs are caculated multiplicitly: where the homozygous WT fitness cost is standardised to 1 and each process reduces the fitness by a factor.
The functions below produce a fitness matrix containing the relative fitness of each genotype compared to the wildtype. This depends on the contents of each construct, the homing dependency (constitutive, conditional or no homing of the construct at each locus) and location of the X-shredder (A or B) if present. 
Each row of the matrix represents a genotype, the first column contains the relative fitness of females and the second the relative fitness of males for each genotype.


###### Disruption costs
At a single locus, where the fitness of the homoxygous wildtype is standardised to 1, the fitness of individuals homozygous for the construct is $1-{s_I}$; homozygous for the cleavage resisitant allele $1-{s_R}$; heterozygous for the construct allele $1-{h_Is_I}$; heterozygous for the cleavage resistant allele $1-{h_Rs_R}$ or heterozygous carrying both the construct and cleavage resistant allele $1-s_{IR}$, where $s_I$, $s_R$ and $s_{IR}$ are selection coefficients and $h_I$ and $h_R$ are dominance coefficients, each of which differ depending on the locus and sex. 


In [None]:
"""
Makes the fitness matrix for female and male individuals taking into consideration disruption of 
function at a single locus.
The output is a matrix, where each column represents the sex (female, male)
and each row represents the genotype of diploid individuals at a single locus e.g. AA, Aa etc.
"""
function Get_disruption_fitness(sif::Float64,hif::Float64,srf::Float64,hrf::Float64,sirf::Float64,
                                sim::Float64,him::Float64,srm::Float64,hrm::Float64,sirm::Float64)
    #genotypes are in the order of A_genotypes and B_genotypes vectors defined as global variables
    fitness_matrix = Array{Float64}(undef,(6,2))
    fitness_matrix[:,1] = [1,1-(sif*hif),1-(srf*hrf),1-sif,1-sirf,1-srf]
    fitness_matrix[:,2] = [1,1-(sim*him),1-(srm*hrm),1-sim,1-sirm,1-srm]
    return(fitness_matrix)
end



###### Expression costs

For individuals carrying a construct at either locus, there may be additional costs due to expression of the transcription units present (one or more of Cas9, gRNA or X-shredder). We assume expression of each transcription unit has the same cost when expressed at either locus. Where the fitness of the homozygous is standardised to 1, the fitness is reduced due to construct expression by $\prod_{k=1}^n (1-s_{k})$ in homozygoyes and $\prod_{k=1}^n (1-s_{k}h_{k})$ in heterozygotes, where $k$ refers to each component (Cas9, gRNA, XS) in the construct, $s$ is the selection coefficient and $h$ the dominance coefficient, each of which can vary depending on the sex.



In [None]:
"""
Makes the fitness matrix for female and male individuals considering the contents of the construct at a single locus..
The output is a matrix, where each column rperesents the sex (female, male)
and each row represents the genotype of a diploid individuals at a single locus e.g. AA, Aa etc.
"""
function Get_construct_expression_fitness(construct,p::Dict{String,Float64})
    
    #Define the dictionary which assignes parameters to names of construct components
    fitness_dict = Dict([("cas9", [p["scasf"],p["hcasf"],p["scasm"],p["hcasm"]]), 
            ("grna", [p["sgRNAf"],p["hgRNAf"],p["sgRNAm"],p["hgRNAm"]]), 
            ("xs", [p["sXSf"],p["hXSf"],p["sXSm"],p["hXSm"]])])

    #The initial fitness of an empty construct is 1
    hom_f = 1
    het_f = 1
    hom_m = 1
    het_m = 1
    
    #For every component in the construct the fitness is multiplied by 1-cost
    for component in construct
        hom_f = hom_f*(1-fitness_dict[component][1])
        het_f = het_f*(1-(fitness_dict[component][1]*fitness_dict[component][2]))
        hom_m = hom_m*(1-fitness_dict[component][3])
        het_m = het_m*(1-(fitness_dict[component][3]*fitness_dict[component][4]))
    end
    
    #returns a matrix where each row is a genotype and the two columns refer to females and males. 
    #genotypes are in the order of A_genotypes and B_genotypes vectors previously defined as global variables
    fitness_matrix = Array{Float64}(undef,(length(A_genotypes),2))
    fitness_matrix[:,1] = [1,het_f,1,hom_f,1,1]
    fitness_matrix[:,2] = [1,het_m,1,hom_m,1,1]
    return(fitness_matrix)
end



###### Activity costs

Off-target cleavage reduces fitness by a factor $1 - s_{H}$ in the presence of Cas9 and gRNA and $(1 - s_{H})^2$ in the presence of Cas9 and two different gRNAs (gRNA targeting A and gRNA targeting B). 
Somatic cleavage reduces fitness by a factor $1-s_{S}$ in the presence of Cas9, gRNA and the target site of gRNA. 
X-shredding reduces fitness by a factor $1-s_{XS}$ in the presence of XS.
Each selection coefficient can vary depending on the sex. 
Here we assume the costs associated with each activity are dominant, i.e. the presence of one of each transcriptional unit required has the same fitness cost as when there are two.


In [None]:
"""
Takes in the fitness matrices for each locus (A and B) and the homing/X-shredder dependencies.
Calculates the fitness for each 2-locus diploid genotype e.g. ABAB, taking into consideration costs due to 
1) locus disruption (from single locus fitness matrix input)
2) construct expression (from single locus fitness matrix input)
3) construct activity (calculated for each locus in this function, based on homing/X-shredder dependencies)
Outputs a fitness matrix consisting of two columns (females and males),
where each row is a 2-locus diploid genotype e.g. ABAB, ABAb etc.
"""
function Get_fitness_matrix(A_fitness_matrix, B_fitness_matrix,A_homing_dependency, B_homing_dependency, XS_dependency,p::Dict{String,Float64})
    
    #____________________________________________________________
    #
    #Fitness of genotype (before synergistic costs are considered)
    #____________________________________________________________
    
    #Calculate the fitness of each combined genotype by multiplying the fitness for genotypes at each locus.
    fitness_M_store = Vector{Float64}() #male fitness
    fitness_F_store = Vector{Float64}() #female fitness

    #For every combined genotype e.g. ABAB
    for genotype in genotypes
        B_fitness = Vector{Float64}()
        A_fitness = Vector{Float64}()

        #Check what the B genotype is e.g. BB
        #and assign B_fitness accordingly:
        for i in 1:length(B_genotypes) #iterate through the B genotypes
            B_genotype = B_genotypes[i] #Split to get the two alleles
            B_genotype = split(B_genotype,"")
            if ( #Check if both alleles occur in the combined genotype (in any order)
                    (occursin(B_genotype[1],genotype[1])) & ((occursin(B_genotype[2],genotype[2])))
                    |
                    (occursin(B_genotype[1],genotype[2])) & ((occursin(B_genotype[2],genotype[1])))
                )
                #If they do assign the fitness
                B_fitness = B_fitness_matrix[i,:]
            end
        end

        #Check what the A genotype is e.g. AA
        #and assign A_fitness accordingly:
        for i in 1:length(A_genotypes) 
            A_genotype = A_genotypes[i] #iterate through the A genotypes
            A_genotype = split(A_genotype,"") #Split to get the two alleles
            if ( #Check if both alleles occur in the combined genotype (in any order)
                    (occursin(A_genotype[1],genotype[1])) & ((occursin(A_genotype[2],genotype[2])))
                    |
                    (occursin(A_genotype[1],genotype[2])) & ((occursin(A_genotype[2],genotype[1])))
                )
                #If they do assign the fitness
                A_fitness = A_fitness_matrix[i,:]
            end
        end

        #Multiply the fitnesses at A and B (Assume no epistatic effects)
        combined_fitness = simplify.(A_fitness .* B_fitness)
        fitness_F = combined_fitness[1]
        fitness_M = combined_fitness[2]
        #Store male and female fitnesses independently
        push!(fitness_M_store,fitness_M)   
        push!(fitness_F_store,fitness_F)   
    end
    
    #____________________________________________________________
    #
    # Applying activit costs
    #____________________________________________________________
    
    for i in 1:length(genotypes)
        genotype_1 = split(genotypes[i][1],"")
        genotype_2 = split(genotypes[i][2],"")

        #a) off-target cleavage
        
        #IF homing at a is condtional on b, a and b are required for off-target cleavage by a
        if A_homing_dependency == "conditional"
            #a and b are required for off-target cleavage by a
            if (((genotype_1[1]=="a") | (genotype_2[1]=="a")) & ((genotype_1[2]=="b") | (genotype_2[2]=="b")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sHaf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sHam"])
            end
        elseif A_homing_dependency == "constitutive"
            #a is required for somatic leakage by a
            if ((genotype_1[1]=="a") | (genotype_2[1]=="a"))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sHaf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sHam"])
            end
        end
        if B_homing_dependency == "conditional"
            #a and b are required for off-target cleavage by b
            if (((genotype_1[1]=="a") | (genotype_2[1]=="a")) & ((genotype_1[2]=="b") | (genotype_2[2]=="b")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sHbf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sHbm"])
            end
        elseif B_homing_dependency == "constitutive"
            #b is required for somatic leakage by b
            if ((genotype_1[2]=="b") | (genotype_2[2]=="b"))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sHbf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sHbm"])
            end
        end 
       
        # b) somatic cleavage
        
        ## if a and b and WT are present, multiple by 1-se
        #This represents somatic leakage of Cas9 leading to cleavage of the target site and some dominant fitness cost
        if A_homing_dependency == "conditional"
            #a, b and A are required for off-target cleavage by a
            if ((((genotype_1[1]=="a") & (genotype_2[1]=="A")) |
                ((genotype_1[1]=="A") & (genotype_2[1]=="a"))) & ((genotype_1[2]=="b") | (genotype_2[2]=="b")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sSaf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sSam"])
            end
        elseif A_homing_dependency == "constitutive"
            #a and A is required for somatic leakage by a
            if (((genotype_1[1]=="a") & (genotype_2[1]=="A")) |
                ((genotype_1[1]=="A") & (genotype_2[1]=="a")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sSaf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sSam"])
            end
        end

        if B_homing_dependency == "conditional"
            #a, b and B are required for off-target cleavage by b
            if ((((genotype_1[2]=="b") & (genotype_2[2]=="B")) |
                ((genotype_1[2]=="B") & (genotype_2[2]=="b"))) & ((genotype_1[1]=="a") | (genotype_2[1]=="a")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sSbf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sSbm"])
            end
        elseif B_homing_dependency == "constitutive"
            #b and B is required for somatic leakage by b
            #a, b and B are required for off-target cleavage by b
            if (((genotype_1[2]=="b") & (genotype_2[2]=="B")) |
                ((genotype_1[2]=="B") & (genotype_2[2]=="b")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sSbf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sSbm"])
            end
        end
        
        # c) X-shredder
        
        if XS_dependency == "conditional"
            if (((genotype_1[1]=="a") | (genotype_2[1]=="a")) & ((genotype_1[2]=="b") | (genotype_2[2]=="b")))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sXsf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sXsm"])
            end
        elseif XS_dependency == "constitutive_A"
            if ((genotype_1[1]=="a") | (genotype_2[1]=="a"))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sXsf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sXsm"])
            end     
        elseif XS_dependency == "constitutive_B"
            if ((genotype_1[2]=="b") | (genotype_2[2]=="b"))
                fitness_F_store[i] = fitness_F_store[i]*(1-p["sXsf"])
                fitness_M_store[i] = fitness_M_store[i]*(1-p["sXsm"])
            end
        end
    end
     
    #Combine male and female fitnesses for each combined genotype into a matrix
    fitness_matrix = hcat(fitness_F_store,fitness_M_store);
    
    return(fitness_matrix)
end

In [77]:
"""
Wrapper function to generate the fitness matrix: 
Input: 
* 2 lists containing construct components for A and B from "cas9","grna" or "xs"
* Homing dependency of locus A and B from "conditional" or "constitutive"
* X-shredder dependency from "conditional_A" or "conditional_B"

The function calculates the fitness for each 2-locus diploid genotype 
e.g. ABAB, taking into consideration costs due to 
1) locus disruption 
2) construct expression 
3) construct activity 
Outputs a fitness matrix consisting of two columns (females and males),
where each row is a 2-locus diploid genotype. 

It is possible to input empty constructs (in the form of an empty vector) to allow for simulation of non-specific configerations 
of constructs, where activity is determined solely on homing and X-shredder dependencies. 
If so, the function returns fitness matrix considering costs due to: 
1) locus disruption
2) construct activity
where expression costs are considered to be zero.
"""
function Build_fitness_matrix(A_construct,B_construct,A_homing_dependency,B_homing_dependency,XS_dependency,p)
    
    A_fitness_matrix = Get_disruption_fitness(p["siaf"],p["hiaf"],p["sraf"],p["hraf"],p["siraf"],
                                            p["siam"],p["hiam"],p["sram"],p["hram"],p["siram"]) .* 
                    Get_construct_expression_fitness(A_construct,p)

    B_fitness_matrix = Get_disruption_fitness(p["sibf"],p["hibf"],p["srbf"],p["hrbf"],p["sirbf"],
                                            p["sibm"],p["hibm"],p["srbm"],p["hrbm"],p["sirbm"]) .* 
                        Get_construct_expression_fitness(B_construct,p)

    fitness_matrix = Get_fitness_matrix(A_fitness_matrix, B_fitness_matrix, A_homing_dependency, B_homing_dependency, XS_dependency,p)
    return(fitness_matrix)
end


Get_fitness_matrix (generic function with 2 methods)

### Homing

Homing in the germline acts to convert genotypes from one to another before segregation during meiosis.
Homing requires the presence of Cas9, gRNA and a target alelle and involves two processes. 
The first is cleavage, which occurs with probability c. 
The second is repair which can either occur via NHEJ with probability j, converting the WT to a resistant allale, or HDR with probability 1-j, converting the WT to transgene.
The functions below generate a genotype x genotype (i x j) matrix, which contain the proportion of each pre-homed genotype (i, row) converted to each post-homed genotypes (j, column) due to homing.


In [90]:
"""
Selects which matrix to buid depending on the homing dependencies 
"conditional" or "constitutive" at locus A or B
"""
function Build_homing_matrix(A_homing_dependency,B_homing_dependency,p)
    caf,jaf,cbf,jbf = p["caf"],p["jaf"],p["cbf"],p["jbf"]
    cam,jam,cbm,jbm = p["cam"],p["jam"],p["cbm"],p["jbm"]
    
    if ((A_homing_dependency  == "constitutive") | (A_homing_dependency  == "none"))
        if ((B_homing_dependency == "constitutive") | (B_homing_dependency  == "none"))
            homing_matrix_f = Build_homing_matrix_Acons_Bcons(caf,jaf,cbf,jbf)
            homing_matrix_m = Build_homing_matrix_Acons_Bcons(cam,jam,cbm,jbm)
        elseif B_homing_dependency  == "conditional"
            homing_matrix_f = Build_homing_matrix_Acons_Bcond(caf,jaf,cbf,jbf)
            homing_matrix_m = Build_homing_matrix_Acons_Bcond(cam,jam,cbm,jbm)
        end
    elseif A_homing_dependency  == "conditional"
        if ((B_homing_dependency == "constitutive") | (B_homing_dependency  == "none"))
            homing_matrix_f = Build_homing_matrix_Acond_Bcons(caf,jaf,cbf,jbf)
            homing_matrix_m = Build_homing_matrix_Acond_Bcons(cam,jam,cbm,jbm)
        elseif B_homing_dependency  == "conditional"
            homing_matrix_f = Build_homing_matrix_Acond_Bcond(caf,jaf,cbf,jbf)
            homing_matrix_m = Build_homing_matrix_Acond_Bcond(cam,jam,cbm,jbm)
        end
    end
    return(homing_matrix_f,homing_matrix_m)
end

"""
Takes in a genotype string and modifies the "non-construct" allele to the allele input
If one of the alleles (A or B) is unchanged, "none" is entered
"""
function get_modified_genotype(original_genotype,mod_a_allele,mod_b_allele)

    genotype = deepcopy(original_genotype)
    genotype1 = split(genotype[1],"")
    genotype2 = split(genotype[2],"")

    if genotype1[1]=="A"
        awt = 1
    else 
        awt = 2
    end

    if genotype1[2]=="B"
        bwt = 1
    else 
        bwt = 2
    end

    x = genotype[awt]
    x = split(x,"")
    if mod_a_allele != "none"
        x[1] = mod_a_allele
    end
    genotype[awt] = join(x,"")

    x = genotype[bwt]
    x = split(x,"")
    if mod_b_allele != "none"
        x[2] = mod_b_allele
    end
    genotype[bwt] = join(x,"")

    return(sort_genotype(genotype))
end

"""
Sorts the genotype string so order is maintained: 
A, a, α
B, b, β
"""
function sort_genotype(genotype)
    
    genotype1 = split(genotype[1],"")
    genotype2 = split(genotype[2],"")

    first_genotype = genotype1
    second_genotype = genotype2
    

    A_alleles_s = string.(A_alleles)
    B_alleles_s = string.(B_alleles)
    a1 = findall(x->x==genotype1[1],A_alleles_s )
    a2 = findall(x->x==genotype2[1],A_alleles_s )
    b1 = findall(x->x==genotype1[2],B_alleles_s )
    b2 = findall(x->x==genotype2[2],B_alleles_s )
    
    if a1 != a2
        #sort by a
        if a1 == a2
            first_genotype = genotype1
            second_genotype = genotype2
        elseif a1 < a2
            first_genotype = genotype1
            second_genotype = genotype2
        elseif a1 > a2
            first_genotype = genotype2
            second_genotype = genotype1
        end
    else
        #sort by b
        if b1 == b2
            first_genotype = genotype1
            second_genotype = genotype2
        elseif b1 < b2
            first_genotype = genotype1
            second_genotype = genotype2
        elseif b1 > b2
            first_genotype = genotype2
            second_genotype = genotype1
        end
    end
    genotype = [join(first_genotype) join(second_genotype)]
    
    return(genotype)
end

"""
Homing at A and B is constitutive so occurs in the presence of: 
* a (drive) and A (target)
* b (drive) and B (target)
"""
function Build_homing_matrix_Acons_Bcons(ca::Float64,
                                        ja::Float64,
                                        cb::Float64,
                                        jb::Float64)

    homing = Array{Float64}(undef,length(genotypes),length(genotypes))
    fill!(homing, 0.0)

    #We begin by assuming no homing, therefore all diagonals are set to 1.0
        #This is just so we dont have to assign all genotypes where no homing occurs.
    homing[diagind(homing, 0)] .= 1.0

    #count = 0
    for genotype in genotypes

        local i=findall(x->x==genotype,genotypes)
        original_genotype = genotype

        #count = count+1
        genotype1 = split(genotype[1],"")
        genotype2 = split(genotype[2],"")

        #IF a and A is present AND b and B is present
        if ((((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))
            &
            (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b"))))

            homing[i,i] = [(1-ca)*(1-cb)] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","b")],genotypes)] = [(ca*(1-ja))*(cb*(1-jb))] #Both homing events occur in either order

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [(ca*(1-ja))*(1-cb)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","β")],genotypes)] = [(ca*(1-ja))*(cb*jb)] #homing of A and mutation of B

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [(cb*(1-jb))*(1-ca)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","b")],genotypes)] = [(cb*(1-jb))*(ca*ja)] #homing of B and mutation of A

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","β")],genotypes)] = [(cb*jb)*(ca*ja)] #mutation of B and A 
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [(ca*ja)*(1-cb)] #Mutation of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [(cb*jb)*(1-ca)] #Mutation of B only

        #IF only a and A is present 
        elseif (((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))

            homing[i,i] = [1.0-ca] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [ca*(1-ja)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [ca*ja] #Mutation of A only

        #IF only b and B is present 
        elseif (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b")))

            homing[i,i] = [1.0-cb] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [cb*(1-jb)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [cb*jb] #Mutation of B only

        end
    end
    return(homing)
end

"""
Homing at constitutive at A and conditional at B, so occurs in the presence of:  
* a (drive) and A (target)
* b (drive) and B (target) and a (drive)
"""
function Build_homing_matrix_Acons_Bcond(ca::Float64,
                                        ja::Float64,
                                        cb::Float64,
                                        jb::Float64)
    
    homing = Array{Float64}(undef,length(genotypes),length(genotypes))
    fill!(homing, 0.0)

    #We begin by assuming no homing, therefore all diagonals are set to 1.0
        #This is just so we dont have to assign all genotypes where no homing occurs.
    homing[diagind(homing, 0)] .= 1.0

    #count = 0
    for genotype in genotypes

        local i=findall(x->x==genotype,genotypes)
        original_genotype = genotype

        #count = count+1
        genotype1 = split(genotype[1],"")
        genotype2 = split(genotype[2],"")

        #IF a and A is present AND b and B is present 
        #Since A is constitutive A drives, since B is conditional and there is an 'a' present B drives()
        if ((((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))
            &
            (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b"))))

            homing[i,i] = [(1-ca)*(1-cb)] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","b")],genotypes)] = [(ca*(1-ja))*(cb*(1-jb))] #Both homing events occur in either order

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [(ca*(1-ja))*(1-cb)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","β")],genotypes)] = [(ca*(1-ja))*(cb*jb)] #homing of A and mutation of B

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [(cb*(1-jb))*(1-ca)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","b")],genotypes)] = [(cb*(1-jb))*ca*ja] #homing of B and mutation of A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","β")],genotypes)] = [cb*jb*ca*ja] #mutation of B and A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [(ca*ja)*(1-cb)] #Mutation of A only into non-functional allele
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [(cb*jb)*(1-ca)] #Mutation of B only

            #println(count," A and B ",genotype)

        #IF only a and A is present (constitutive homing)
        elseif (((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))

            homing[i,i] = [1.0-ca] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [ca*(1-ja)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [ca*ja] #Mutation of A only into non-functional allele

            #println(count," A ",genotype)

        #IF only b and B is present and a least one a (to allow for conditional homing)
        elseif (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))

            homing[i,i] = [1.0-cb] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [cb*(1-jb)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [cb*jb] #Mutation of B only

            #println(count," B ",genotype)
        end
    end

    return(homing)
end

"""
Homing at conditional at A and constitutive at B, so occurs in the presence of:  
* a (drive) and A (target) and b (drive)
* b (drive) and B (target)
"""
function Build_homing_matrix_Acond_Bcons(ca::Float64,
                                        ja::Float64,
                                        cb::Float64,
                                        jb::Float64)
    
    homing = Array{Float64}(undef,length(genotypes),length(genotypes))
    fill!(homing, 0.0)

    #We begin by assuming no homing, therefore all diagonals are set to 1.0
        #This is just so we dont have to assign all genotypes where no homing occurs.
    homing[diagind(homing, 0)] .= 1.0

    #count = 0
    for genotype in genotypes

        local i=findall(x->x==genotype,genotypes)
        original_genotype = genotype

        #count = count+1
        genotype1 = split(genotype[1],"")
        genotype2 = split(genotype[2],"")

        #IF a and A is present AND b and B is present 
        #Since B is constitutive B drives, since A is conditional and there is an 'b' present A drives()
        if ((((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))
            &
            (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b"))))

            homing[i,i] = [(1-ca)*(1-cb)] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","b")],genotypes)] = [(ca*(1-ja))*(cb*(1-jb))] #Both homing events occur in either order

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [(ca*(1-ja))*(1-cb)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","β")],genotypes)] = [(ca*(1-ja))*(cb*jb)] #homing of A and mutation of B

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [(cb*(1-jb))*(1-ca)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","b")],genotypes)] = [(cb*(1-jb))*(ca*ja)] #homing of B and mutation of A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","β")],genotypes)] = [(cb*jb)*(ca*ja)] #mutation of B and A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [(ca*ja)*(1-cb)] #Mutation of A only into non-functional allele
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [(cb*jb)*(1-ca)] #Mutation of B only

            #println(count," A and B ",genotype)

        #IF only b and B is present (constitutive homing)
        elseif (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b")))

            homing[i,i] = [1.0-cb] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [cb*(1-jb)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [cb*jb] #Mutation of B only

            #println(count," A ",genotype)

        #IF only a and A is present and at least one b (to allow for conditional homing)
        elseif (((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")) & ((genotype1[2]=="b") | (genotype2[2]=="b")))

            homing[i,i] = [1.0-ca] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [ca*(1-ja)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [ca*ja] #Mutation of A only into non-functional allele

            #println(count," B ",genotype)
        end
    end
    return(homing)
end

"""
Homing at A and B is conditional, so occurs in the presence of:  
* a (drive) and A (target) and b (drive)
* b (drive) and B (target) and a (drive)
"""
function Build_homing_matrix_Acond_Bcond(ca::Float64,
                                        ja::Float64,
                                        cb::Float64,
                                        jb::Float64)
    
    homing = Array{Float64}(undef,length(genotypes),length(genotypes))
    fill!(homing, 0.0)

    #We begin by assuming no homing, therefore all diagonals are set to 1.0
        #This is just so we dont have to assign all genotypes where no homing occurs.
    homing[diagind(homing, 0)] .= 1.0

    #count = 0
    for genotype in genotypes

        local i=findall(x->x==genotype,genotypes)
        original_genotype = genotype

        #count = count+1
        genotype1 = split(genotype[1],"")
        genotype2 = split(genotype[2],"")

        #IF a and A is present AND b and B is present 
        #Since A is conditional and 'b' is present A drives, since B is conditional and there is an 'a' present B drives()
        if ((((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))
            &
            (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b"))))

            homing[i,i] = [(1-ca)*(1-cb)] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","b")],genotypes)] = [(ca*(1-ja))*(cb*(1-jb))] #Both homing events occur in either order

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [(ca*(1-ja))*(1-cb)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","β")],genotypes)] = [(ca*(1-ja))*(cb*jb)] #homing of A and mutation of B

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [(cb*(1-jb))*(1-ca)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","b")],genotypes)] = [(cb*(1-jb))*(ca*ja)] #homing of B and mutation of A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","β")],genotypes)] = [cb*jb*(ca*ja)] #mutation of B and A into non-functional allele

            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [ca*ja*(1-cb)] #Mutation of A only into non-functional allele
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [(cb*jb)*(1-ca)] #Mutation of B only

            #println(count," A and B ",genotype)

        #IF only a, A and at least one b is present (conditional homing of a)
        elseif (((genotype1[1]=="A") | (genotype2[1]=="A")) & ((genotype1[1]=="a") | (genotype2[1]=="a")) & ((genotype1[2]=="b") | (genotype2[2]=="b")))

            homing[i,i] = [1.0-ca] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"a","none")],genotypes)] = [ca*(1-ja)] #Homing of A only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"α","none")],genotypes)] = [ca*ja] #Mutation of A only into non-functional allele

            #println(count," A ",genotype)

        #IF only b, B and at least one a is present (conditional homing of b)
        elseif (((genotype1[2]=="B") | (genotype2[2]=="B")) & ((genotype1[2]=="b") | (genotype2[2]=="b")) & ((genotype1[1]=="a") | (genotype2[1]=="a")))

            homing[i,i] = [1.0-cb] #No change
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","b")],genotypes)] = [cb*(1-jb)] #Homing of B only
            homing[i,findall(x->[x]==[get_modified_genotype(original_genotype,"none","β")],genotypes)] = [cb*jb] #Mutation of B only

            #println(count," B ",genotype)
        end
    end
    return(homing)
end


Build_homing_matrix

### Sex bias

The probability of a male producing an X- or Y-bearing gamete depends on the presence of an X-shredder. The functions below generate a matrix containing the proportion of X- (columns 1) or Y- (column 2) bearing sperm depending on the genotype (row). m is the proporion of Y-bearing sperm produced for a male carrying at least one copy of the X-shredder. We assume that x-shredding function is dominant, thus heterozygotes and homozygotes for the xshredder create the same sex bias. 

In [26]:
"""
Selects which matrix to generate depending on the sex ratio dependencies 
"constitutive_A" or "constitutive_B". 
"""
function Build_sex_ratio_matrix(XS_dependency,p)
    m=p["m"]
    ## Pick the male bias matrix according to the XS dependency
    sex_ratio_matrix = []
    if XS_dependency == "none"
        ## If none, use constitutive_A as default matrix and ensure m = 0.5
        sex_ratio_matrix = Build_sexratio_matrix_constitutive_A(m)
    elseif XS_dependency  == "constitutive_A"
        sex_ratio_matrix = Build_sexratio_matrix_constitutive_A(m)
    elseif XS_dependency  == "constitutive_B"
        sex_ratio_matrix = Build_sexratio_matrix_constitutive_B(m)
    elseif XS_dependency  == "conditional"
        sex_ratio_matrix = Build_sexratio_matrix_conditional(m)
    else
        print("Model not available")
    end
    return(sex_ratio_matrix)
end

"""
Defines the sex ratio for a single locus
"""
function define_locus_sex_ratio_matrix(m)
    #order is as defined by the A_genotypes and B_genotypes
    #each construct has the ability to induce a sex ratio
    locus_sex_ratio = [
        0.5 0.5 ; 
        1-m m;
        0.5 0.5;
        1-m m;  
        1-m m;
        0.5 0.5]
    return(locus_sex_ratio)
end

"""
sex distortion is dependent on the B construct
"""
function Build_sexratio_matrix_constitutive_B(m::Float64)
    
    locus_sex_ratio = define_locus_sex_ratio_matrix(m)
    
    #Calculate the sex ratio of each genotype
    sex_ratio_storeM = Vector{Float64}()
    sex_ratio_storeF = Vector{Float64}()
    #For each combined genotype e.g. ABAB
    for genotype in genotypes
        sex_ratio = []
        #Check what the B genotype is and assign sex ratio accordingly:
        for i in 1:length(B_genotypes) #For each B genotype
            B_genotype = B_genotypes[i]
            B_genotype = split(B_genotype,"") #Split the B genotype into alleles e.g. B, B
            if ( #If both alleles occur in the combined genotype
                    (occursin(B_genotype[1],genotype[1])) & ((occursin(B_genotype[2],genotype[2])))
                    |
                    (occursin(B_genotype[1],genotype[2])) & ((occursin(B_genotype[2],genotype[1])))
                )
                #Assign sec ratio of that B genotype
                sex_ratio = locus_sex_ratio[i,:] #WT 
            end
        end
        push!(sex_ratio_storeF,sex_ratio[1])   
        push!(sex_ratio_storeM,sex_ratio[2])   

    end

    sex_ratio_matrix_constitutive_B = hcat(sex_ratio_storeF,sex_ratio_storeM);
    return(sex_ratio_matrix_constitutive_B)
end

"""
sex distortion is dependent on the A construct
"""
function Build_sexratio_matrix_constitutive_A(m::Float64)
    locus_sex_ratio = define_locus_sex_ratio_matrix(m)
    
    #Calculate the sex ratio of each genotype
    sex_ratio_storeM = Vector{Float64}()
    sex_ratio_storeF = Vector{Float64}()
    #For each combined genotype e.g. ABAB
    for genotype in genotypes
        sex_ratio = []
        #Check what the A genotype is and assign sex ratio accordingly:
        for i in 1:length(A_genotypes) #For each B genotype
            A_genotype = A_genotypes[i]
            A_genotype = split(A_genotype,"") #Split the A genotype into alleles e.g. A, A
            if ( #If both alleles occur in the combined genotype
                    (occursin(A_genotype[1],genotype[1])) & ((occursin(A_genotype[2],genotype[2])))
                    |
                    (occursin(A_genotype[1],genotype[2])) & ((occursin(A_genotype[2],genotype[1])))
                )
                #Assign sec ratio of that A genotype
                sex_ratio = locus_sex_ratio[i,:] #WT 
            end
        end
        push!(sex_ratio_storeF,sex_ratio[1])   
        push!(sex_ratio_storeM,sex_ratio[2])   

    end

    sex_ratio_matrix_constitutive_A = hcat([sex_ratio_storeF,sex_ratio_storeM]...);
    return(sex_ratio_matrix_constitutive_A)
end


Build_sex_ratio_matrix

### Recombination
Recombination can occur between the two loci with probability r during gamete production. 
The function generates a matrix containing the proportion of each gamete (column) produced from each genotype (row), 
where each row refers to a diploid genotype (individual) and each column to a haploid genotype (gamete) at the autosomal loci only.


In [106]:
"""
Calculate the probability of each genotype generating each gamete (allele)
taking into consideration the recombination probability r between 
A and B loci.
Returns matrix of the proportion of each gamete (columns) prouced by each parental genotype (row)
"""
function Build_recombination_matrix(p::Dict{String,Float64})
    
    r=p["r"]

    recombination_matrix = Array{Float64}(undef,(length(genotypes),9))
    #For each combined genotype
    for a in enumerate(genotypes)
        local genotype = a[2]
        local i = a[1]
        #Initiate a row of the matrix
        recombination_vec = [Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0) Sym(0.0)]

        #Screen whether each allele (gamete) is present in each genotype
        nonrecombinant_mask = [a in genotype for a in alleles]
        #if 2 alleles are present (the genotype is heterozygous)
        if sum(nonrecombinant_mask) == 2
            #Assign the 2 matched alleles non-recombinant inheritence (1-r)/2
            recombination_vec[nonrecombinant_mask] = collect([(1-r)/2, (1-r)/2])
            #Assign the rest of the gametes that are not produced and inheritance of 0
            recombination_vec[nonrecombinant_mask .== false] = [0,0,0,0,0,0,0]
            #if 1 allele is present (the genotype is homozygous)
            elseif sum(nonrecombinant_mask) == 1
                #Assign the 1 matched allele non-recombinant inheritance (1-r)
                recombination_vec[nonrecombinant_mask] = collect([(1-r)])
                #Assign the rest of the gametes that are not produced and inheritance of 0
                recombination_vec[nonrecombinant_mask .== false] = [0,0,0,0,0,0,0,0]
        end
        #Recombine the genotype
        A1 = split(genotype[1],"")
        A2 = split(genotype[2],"")
        genotype_recomb = [A1[1]*A2[2],A2[1]*A1[2]]
        #Screen whether each allele (gamete) is present in each genotype
        recombinant_mask = [a in genotype_recomb for a in alleles]
        #if 2 alleles are present (the genotype is heterozygous)
        if sum(recombinant_mask) == 2
            #Assign the 2 matched alleles recombinant inheritence r/2
            recombination_vec[recombinant_mask] = collect(recombination_vec[recombinant_mask]+[r/2,r/2])
            #if 1 allele is present (the genotype is homozygous)
            elseif sum(recombinant_mask) == 1
                #Assign the 1 matched alleles recombinant r
                recombination_vec[recombinant_mask] = collect(recombination_vec[recombinant_mask]+[r])
        end
        recombination_matrix[i,:]=recombination_vec
    end
    return(recombination_matrix)
end

Build_recombination_matrix

## Simulation

In [3]:
"""
Defines starting population and matrices,
runs simulation
and calculates additional metrics from output.
"""
function Simulation_wrapper(params,
                            A_construct,
                            B_construct,
                            A_homing_dependency,
                            B_homing_dependency,
                            XS_dependency,
                            fitness_effect,
                            transgenic_genotype,
                            release_freq, 
                            resistant_allele, 
                            resistant_freq,
                            t)
    
    #Define starting population
    starting_population_post_release = release_transgenic_males(
        Initiate_prerelease_population_3lifestages(resistant_allele, resistant_freq, params),
        Symbol(transgenic_genotype),
        release_freq)

    #Define matrices
    matrices = make_matrices(params,
                    A_construct,
                    B_construct,
                    A_homing_dependency,
                    B_homing_dependency,
                    XS_dependency);
    
    #simulate time series
    output = Simulate_timeseries(
                params,
                matrices,
                fitness_effect,
                t,
                starting_population_post_release)
    
    #process simulation
    data_dict = Simulation_processing_wrapper(output)

    return(data_dict)
end
"""
Defines starting population (with constructs released in separate males) and matrices, 
runs simulation and 
calculates additional metrics from output.
"""
function Simulation_wrapper_separate_release(params,
                            A_construct,
                            B_construct,
                            A_homing_dependency,
                            B_homing_dependency,
                            XS_dependency,
                            fitness_effect,
                            transgenic_genotype1,
                            transgenic_genotype2,
                            release_freq1,
                            release_freq2, 
                            resistant_allele, 
                            resistant_freq,
                            t)
    
    #Define starting population
    starting_population_post_release = release_2transgenic_males(
        Initiate_prerelease_population_3lifestages(resistant_allele, resistant_freq, params),
        Symbol(transgenic_genotype1),
        Symbol(transgenic_genotype2),
        release_freq1,
        release_freq2)

    #Define matrices
    matrices = make_matrices(params,
                    A_construct,
                    B_construct,
                    A_homing_dependency,
                    B_homing_dependency,
                    XS_dependency);
    
    #simulate time series
    output = Simulate_timeseries(
                params,
                matrices,
                fitness_effect,
                t,
                starting_population_post_release)
    
    #process simulation
    data_dict = Simulation_processing_wrapper(output)

    return(data_dict)
end

Simulation_wrapper_separate_release


###### Defining the population upon release

We define a 45x6 matrix of numbers of individuals of each genotype after transgenic release. Each row refers to a genotype and each column to the following catagories or organism: Female zygotes, males zygotes, female pupae, male pupae, female adults and male adults. The pre-release population may contain only WT individuals (ABAB) or may contain a resistant allele, assumed to be at Hardy-Weinberg equilibrium at a user-define allele frequency. We provide functions to simulate the release of either a single type of transgenic male, or separate releases of males with two different genotypes.

In [5]:
"""
Initiates a dictionary containing the number of individuals for each genotype, each of which has an initial value of 0. 
"""
function Initiate_genotype_freq()
    freq_genotype = Dict()
    for i in 1:length(genotypes_sym)
       freq_genotype[string(genotypes_sym[i])] = 0.0
    end
    return(freq_genotype)
end

"""
Returns 2 dictionaries containing the numbers of each genotype, one for females and one for males.
The total number of males or females is defined by the variable params: OJ, f and α and refers to the equilibrium population size of each population.
The population can contain both WT individuals (ABAB) and genotypes containing a resistant allele.
The population is assumed to be at hardy weinberg equilibrium, with the resistant allele at a frequency defined by the variable "resistant_freq".
"""
function Initiate_prerelease_population(resistant_allele, resistant_freq, params)
    
    #Assuming population is at equilibrium 
    Rm = (params["f"]*params["OJ"])/2
    Nf = params["α"]*(Rm-1)/params["f"]
    # notes: 
    #f=(2*Rm)/OJ 
    #J=(2*Rm)/f 
    #a = f*Nf/(Rm-1)
    #Rm = (OJ*f)/2
    #Nf = a*(Rm-1)/f
    
    #Define each frequency initially as 0
    genotype_Nf = Initiate_genotype_freq()
    genotype_Nm = Initiate_genotype_freq()

    #homozygous genotype
    if resistant_allele ∈ ["A","a","α"]
        het = "AB"*resistant_allele*"B"
        hom = resistant_allele*"B"*resistant_allele*"B"
    elseif resistant allele ∈ ["B","b","β"]
        het = "ABA"*resistant_allele
        hom = "A"*resistant_allele*"A"*resistant_allele
    end

    genotype_Nf["ABAB"] = (1-resistant_freq)^2*Nf
    genotype_Nf[hom] = resistant_freq^2*Nf
    genotype_Nf[het] = 2 * resistant_freq * (1-resistant_freq)*Nf

    genotype_Nm["ABAB"] = (1-resistant_freq)^2*Nf
    genotype_Nm[hom] = resistant_freq^2*Nf
    genotype_Nm[het] = 2 * resistant_freq * (1-resistant_freq)*Nf

    return(genotype_Nf,genotype_Nm)
end

"""
Initiates the prerelease population of adults.
Back-calculates the number of zygotes and pupae for a given adult population and parameter values. 
Returns a (6x45 matrix) of numbers of zygote, pupae and adults for females and males, with column headers labelled by genotype.
"""
function Initiate_prerelease_population_3lifestages(resistant_allele, resistant_freq, params)
    
    genotype_Nf,genotype_Nm = Initiate_prerelease_population(resistant_allele, resistant_freq, params)

    starting_population = Array{Float64}(undef,(6,length(genotypes)))
 
    #Modify data type to arrays in the the order of genotypes
    starting_population[5,:] = [genotype_Nf[i] for i in string.(genotypes_sym)]
    starting_population[6,:] = [genotype_Nm[i] for i in string.(genotypes_sym)]
    
    #Assuming the population is at equilibrium AND the population is pre-release
    #No. zygotes
    starting_population[1,:] =  starting_population[5,:] .* params["f"] /2
    starting_population[2,:] =  starting_population[6,:] .* params["f"] /2

    #No. zygotes after density dependent mortality (pupae)
    n_zygotes_sum = sum(starting_population[1,:])+sum(starting_population[2,:])
    juvenile_survival_prob = params["OJ"]*(params["α"]/(params["α"]+n_zygotes_sum))
    starting_population[3,:]= starting_population[5,:] .* params["f"] /2 * juvenile_survival_prob
    starting_population[4,:] = starting_population[6,:] .* params["f"] /2 * juvenile_survival_prob
    
    N_individuals = DataFrame(starting_population,Symbol.(genotypes_sym))
    
    return(N_individuals)
end

"""
Takes in a 6x45 matrix defining the numbers of genotypes for a population pre-release and 
returns a 45x6 matrix of numbers of each genotype after release of adult males. 
The frequency and genotype of the release males is defined by the inputs: 
transgenic_genotype and release_freq.
"""
function release_transgenic_males(starting_population,transgenic_genotype,release_freq)
    #Here we make the assumption that all released individuals are fully fit
    starting_population[6,transgenic_genotype] = sum(starting_population[6,:])*release_freq
    
    starting_population = transpose(Array(starting_population))
    
    return(starting_population)
end

"""
Takes in a 6x45 matrix defining the numbers of genotypes for a population pre-release and 
returns a 45x6 matrix of numbers of each genotype after release of two different types of adult males. 
The frequency and genotype of the release males is defined by the inputs: 
transgenic_genotype1, transgenic_genotype2, release_freq1 and release_freq2
"""
function release_2transgenic_males(starting_population,transgenic_genotype1,transgenic_genotype2,release_freq1,release_freq2)
    #Here we make the assumption that all released individuals are fully fit
    starting_population[6,transgenic_genotype1] = sum(starting_population[6,:])*release_freq1
    starting_population[6,transgenic_genotype2] = sum(starting_population[6,:])*release_freq2
    
    starting_population = transpose(Array(starting_population))
    
    return(starting_population)
end


release_2transgenic_males (generic function with 1 method)


###### Time series simulation

Parameter values and information on loci type, homing conditions and the x-shredder location are used to produce matrices (using the functions in the Model Definition section) for applying fitness, homing, sex ratio and recombination processes to the population during each generation.

During each generation:
* Eggs and sperm are produced, taking into consideration homing, recombination and sex ratio bias. 
* Zygotes are procuded assuming random mating and that males are not limiting. 
* Fitness effects are applied at either the zygote or adult stage. 
* Density dependence is applied at the zygote stage after fitness effects (if applicable to zygotes). 
* Censusing of both the zygote and adults are taken after any fitness effects are applied.

The ouput is a dictionary of time-series for allele or genotype frequencies, numbers of individuals and correlation between constructs.

In [43]:
"""
Builds the matrices to be used based on the construct contents, homing dependencies, XS dependencies and parameters.
If construct contents are left empty ([]), expression costs are assumed to be zero, but activity can be defined 
for a generic construct structure based on homing and xshredder dependencies alone. 
"""
function make_matrices(p,
                        A_construct,
                        B_construct,
                        A_homing_dependency,
                        B_homing_dependency,
                        XS_dependency)

    fitness_matrix = Build_fitness_matrix(A_construct, B_construct,A_homing_dependency, B_homing_dependency, XS_dependency,p)
    homing_matrix_f,homing_matrix_m = Build_homing_matrix(A_homing_dependency,B_homing_dependency,p)
    sex_ratio_matrix = Build_sex_ratio_matrix(XS_dependency,p)
    recombination_matrix = Build_recombination_matrix(p)

    return(fitness_matrix,
            homing_matrix_f,
            homing_matrix_m,
            recombination_matrix,
            sex_ratio_matrix)
end

"""
Takes in the number of females adults, female homing matrix, recombination matrix and parameter values. 
The number of eggs produced by each genotype is calculated. 
Homing is applied by multiplying (element-wise) the vector of egg numbers per genotype by the female homing matrix, 
and summing over the columns. 
Recombination is applied and the number of each gamete produced per genotype is calculated (45x9 matrix) 
by multiplying (element-wise) the number of eggs per genotype after homing by the recombination matrix.
The matrix is summed over each column to find the number of eggs produced of each gamete genotype, 
returning a vector of length 9.
"""
function make_eggs(genotype_Nf_adult,homing_matrix_f_vals,recombination_matrix_vals,params)

    #Calulate the number of eggs produced by each reproducing adult female
    neggs_per_genotype = genotype_Nf_adult * params["f"];
    
    #Homing:
    #Calculate the numbers of eggs produced from each genotype after homing
    neggs_per_genotype_per_genotype_homing = neggs_per_genotype.*homing_matrix_f_vals
    neggs_per_genotype_homing = sum(neggs_per_genotype_per_genotype_homing,dims=1)[:]

    #recombination
    #Calulate the number of each gamete produced by each genotype after recombination
    neggs_per_genotype_per_gamete = neggs_per_genotype_homing .*recombination_matrix_vals;
    # Calculate the number of each gamate produced by the female population in total
    neggs_per_gamete = sum(neggs_per_genotype_per_gamete,dims=1)[:];
end

"""
Takes in the number of male adults, male homing matrix, recombination matrix and sex ratio matrix. 
Males are assumed to be not limiting, so the number of males is converted to frequency. 
Homing is applied by multiplying (element-wise) the frequency of males per genotype by the male homing matrix, 
and summing over the columns. 
Recomnbination is applied and the frequency of each gamete produced per genotype is calculated (45x9 matrix) 
by multiplyng (element-wise) the frequency of males per genotype after homing by the recombination matrix. 
Sex ratio bias is applied by multiplying (element-wise) the 45x9 matrix containing the frequency of each gamete per genotype
by each column of the the sex ratio matrix separately, providing 2 matrices, one for the proportion of each Y-bearing gamete
genotype per adult male and another for the X-bearing equivalent.
The columns of each X-bearing and Y-bearing sperm matrix are summed to find the frequency of 
each gamete genotype produced.
Returns a (9x2) matrix, containing the frequency of X-bearing sperm (column 1) and Y-bearing sperm (column 2)
of each gamate genotype (rows).
"""
function make_sperm(genotype_Nm_adult,homing_matrix_m_vals,recombination_matrix_vals,sex_ratio_matrix_vals)
    
    #conver male numbers to frequency. 
    genotype_freqm_fitness = genotype_Nm_adult./sum(genotype_Nm_adult)

    #Homing:
    genotype_freqm_per_genotype_homing = genotype_freqm_fitness.*homing_matrix_m_vals
    genotype_freqm_homing = sum(genotype_freqm_per_genotype_homing,dims=1)[:]

    #recombination
    freqsperm_per_genotype = genotype_freqm_homing.*recombination_matrix_vals;

    #sex ratio
    freqsperm_per_genotype_per_gamete_X = freqsperm_per_genotype.*sex_ratio_matrix_vals[:,1]
    freqsperm_per_genotype_per_gamete_Y = freqsperm_per_genotype.*sex_ratio_matrix_vals[:,2]

    freqsperm_per_gamete_X = sum(freqsperm_per_genotype_per_gamete_X,dims=1)[:]
    freqsperm_per_gamete_Y = sum(freqsperm_per_genotype_per_gamete_Y,dims=1)[:]
    freqsperm_per_gamete = vcat(freqsperm_per_gamete_X,freqsperm_per_gamete_Y)

    return(hcat(freqsperm_per_gamete_X,freqsperm_per_gamete_Y))
end

"""
Takes in a 9x3 matrix containing egg numbers and sperm (X and Y) frequencies and calculates the proportion of 
each female and male zygote produced of each diploid genotype. 
Assumes random mating and that males are not limiting. 
Returns as 78x2 matrix containing numbers of female (column 1) and male (column 2) zygote genotypes. 
"""
function Find_nzygotes(gametes)
    #Taking in matrices of the number of eggs and sperm produced by each genotype:
    #Calculates the proportion of each female and male zygote of each genotype produced.
    
    neggs_genotype = gametes[:,1]
    freqsperm_genotype_X = gametes[:,2]
    freqsperm_genotype_Y = gametes[:,3]
    
    #Female zygote genotype 
    genotype_freq_F_vec = []
    #Male zygote genotype 
    genotype_freq_M_vec = []
    
    #For each potential zygote genotype
    for genotype in genotypes
        #Check if homozygote
        if genotype[1] == genotype[2]
            #Find the index of the gamete present in the genotype
            index = findall(x->x==genotype[1],alleles)[1]
            #multiply the number of eggs by the frequency X sperm of the specified gamete
            genotype_freq_F = neggs_genotype[index]*freqsperm_genotype_X[index]
            #multiply the number of eggs by the frequency Y sperm of the specified gamete
            genotype_freq_M = neggs_genotype[index]*freqsperm_genotype_Y[index]
        else
            #Find the index of each allele in the genotype
            index1 = findall(x->x==genotype[1],alleles)[1]
            index2 = findall(x->x==genotype[2],alleles)[1]
            #multiply the number of eggs by the frequency X sperm of the specified gametes in both combinations
            genotype_freq_F = ((neggs_genotype[index1]*freqsperm_genotype_X[index2])) + ((freqsperm_genotype_X[index1]*neggs_genotype[index2]))
            #multiply the number of eggs by the frequency Y sperm of the specified gametes in both combinations
            genotype_freq_M = ((neggs_genotype[index1]*freqsperm_genotype_Y[index2])) + ((freqsperm_genotype_Y[index1]*neggs_genotype[index2]))

        end
        push!(genotype_freq_F_vec,genotype_freq_F)
        push!(genotype_freq_M_vec,genotype_freq_M)
    end
    
    return(hcat(genotype_freq_F_vec,genotype_freq_M_vec))
end

"""
Takes in a set of parameters and a 45x2 matrix of numbers per genotype. 
Applies the density dependent function to the population. 
Returns a 45x2 matrix of numbers of individuals per genotype after density dependence has been applied. 
"""
function apply_density_dependence(params,n_zygotes)
    n_zygotes_sum = sum(n_zygotes)
    juvenile_survival_prob = params["OJ"]*(params["α"]/(params["α"]+n_zygotes_sum))
    n_zygote_dd = n_zygotes * juvenile_survival_prob

    return(n_zygote_dd)
end

"""
Simulation of population through time, in discrete non-overlapping generations. 
The function takes in: parameters, matrixes, fitness effect, a starting population (post release) and 
the number of generations required.
Eggs and sperm are produced, taking into consideration homing, recombination and sex ratio bias. 
Zygotes are procuded assumin random mating and that males are not limiting. 
fitness effects are applied according to the fitness effect input(zygote or adult)
Density dependence is applied at the zygote stage before zygotes develop into pupae. 
Zygote mortality (if applied) is applied before density dependence and the zygote population is censused after. 
The adult population is also censused after fitness effects are applied (if applicable).
Zygote, pupae and adult population numbers for each sex are saved in a 45x6xt matrix, 
and egg numbers and sperm frequencies are saved in a 9x3xt matrix, where t is the number of generations. 
To enable intuitive data access, the output matrices are processed into a dictionary of dataframes
containing tx45 matrices for each diploid population, a tx9 matrix for egg numbes and a tx18 for sperm frequency
with columns accessible by labelled genotypes.
"""
function Simulate_timeseries(
        params, #dictionary of parameters
        matrices, #list of matrices
        fitness_effect, #string defining when fitness costs are applied (zygote or adult)
        t, #number of generations to run the simulation
        starting_population_post_release #the starting population (45 x 6 matrix)
        )

    #Unpack list of pre-defined matrices
    fitness_matrix_vals,
    homing_matrix_m_vals,
    homing_matrix_f_vals,
    recombination_matrix_vals,
    sex_ratio_matrix_vals = matrices

    #define (45x6xt) storage for n. individuals (f zygote, m zygote, f pupae, m pupae, f adult, m adult)
    n = Array{Float64}(undef,(length(genotypes),6,t+1))
    #define (9x3xt) storage for n. gametes (eggs, X-bearing sperm, Y-bearing sperm))
    g = Array{Float64}(undef,(length(alleles),3,t+1))

    #save starting population from input
    n[:,:,1]=starting_population_post_release 

    #same no. adults in starting pop
    genotype_Nf_adult=n[:,5,1]
    genotype_Nm_adult=n[:,6,1]

    for gen in 1:t
        #female adults produce eggs 
        neggs_per_gamete =  make_eggs(genotype_Nf_adult,homing_matrix_f_vals,recombination_matrix_vals,params)
        g[:,1,gen+1] = neggs_per_gamete
        #male adults produce sperm
        freqsperm_per_gamete = make_sperm(genotype_Nm_adult,homing_matrix_m_vals,recombination_matrix_vals,sex_ratio_matrix_vals)
        g[:,2:3,gen+1] = freqsperm_per_gamete
        #zygotes are generated from eggs and sperm
        n_zygotes = Find_nzygotes(g[:,:,gen+1])
        #fitess costs are applied to zygotes if applicable
        fitness_effect == "zygote" ? n_zygotes = n_zygotes .*fitness_matrix_vals : n_zygotes = n_zygotes
        n[:,1:2,gen+1] = n_zygotes
        #density dependent mortality is applied
        n_pupae = apply_density_dependence(params,n_zygotes)
        n[:,3:4,gen+1] = n_pupae
        #fitess costs are applied to zygotes if applicable
        fitness_effect == "adult" ? n_adults = n_pupae .*fitness_matrix_vals : n_adults = n_pupae
        n[:,5:6,gen+1] = n_adults
        #Number of adults are updated for generation t+1
        genotype_Nf_adult=n_adults[:,1]
        genotype_Nm_adult=n_adults[:,2]
    end

    #Store as dataframes in dictionary for intuitive labelled access 
    header = Symbol.(genotypes_sym)
    output = Dict(
        "genotypenumber_adult_female" => DataFrame(transpose(n[:,5,:]), header),
        "genotypenumber_adult_male" => DataFrame(transpose(n[:,6,:]), header),
        "genotypenumber_pupae_female" => DataFrame(transpose(n[:,3,:]), header),
        "genotypenumber_pupae_male" => DataFrame(transpose(n[:,4,:]), header),
        "genotypenumber_zygote_female" => DataFrame(transpose(n[:,1,:]), header),
        "genotypenumber_zygote_male" => DataFrame(transpose(n[:,2,:]), header),
        "eggnumber" => DataFrame(transpose(g[:,1,:]), Symbol.(alleles)),
        "spermfreq" => DataFrame(hcat(transpose(g[:,2,:]),transpose(g[:,3,:])),Symbol.([alleles.*"X" ; alleles.*"Y"]))
    )

    return(output)
end


apply_density_dependence (generic function with 1 method)


###### Post simulation processing

In [1]:
"""
Takes in a simulation output and calculates various summary metrics from the numbers of individuals/gametes. 
Returns a dictionary containing the same simulation output data with additional entries. 
"""
function Simulation_processing_wrapper(output)
        
    #Calculate additional values of interest from the time series output
    total_Nf = hcat(sum.(eachrow(output["genotypenumber_pupae_female"])),sum.(eachrow(output["genotypenumber_adult_female"])))
    total_Nf = DataFrame(total_Nf,["pupae","adult"])
    relative_Nf = Find_relative_Nf(total_Nf);

    #For all adults (regardless of reproductive fitness)
    genotype_freqaf_store = Find_freq_from_no_df(output["genotypenumber_adult_female"])
    allele_freqaf = Find_allele_frequencies_from_genotypes_df(genotype_freqaf_store);
    genotype_freqam_store = Find_freq_from_no_df(output["genotypenumber_adult_male"])
    allele_freqam = Find_allele_frequencies_from_genotypes_df(genotype_freqam_store);
    genotype_freqa_store = (genotype_freqaf_store.+genotype_freqam_store)./2;
    allele_freqa = Find_allele_frequencies_from_genotypes_df(genotype_freqa_store);
   
    #For the reproductive adults (regardless of reproductive fitness)
    genotype_freqpf_store = Find_freq_from_no_df(output["genotypenumber_pupae_female"])
    allele_freqpf = Find_allele_frequencies_from_genotypes_df(genotype_freqpf_store);
    genotype_freqpm_store = Find_freq_from_no_df(output["genotypenumber_pupae_male"])
    allele_freqpm = Find_allele_frequencies_from_genotypes_df(genotype_freqpm_store);
    genotype_freqp_store = (genotype_freqpf_store.+genotype_freqpm_store)./2;
    allele_freqp = Find_allele_frequencies_from_genotypes_df(genotype_freqp_store);
    
    #For the zygotes (regardless of reproductive fitness)
    genotype_freqzf_store = Find_freq_from_no_df(output["genotypenumber_zygote_female"])
    allele_freqzf = Find_allele_frequencies_from_genotypes_df(genotype_freqzf_store);
    genotype_freqzm_store = Find_freq_from_no_df(output["genotypenumber_zygote_male"])
    allele_freqzm = Find_allele_frequencies_from_genotypes_df(genotype_freqzm_store);
    genotype_freqz_store = (genotype_freqzf_store.+genotype_freqzm_store)./2;
    allele_freqz = Find_allele_frequencies_from_genotypes_df(genotype_freqz_store);
    
    #For the zygotes/all adults (regardless of reproductive fitness)
    correlationa = Find_correlation_df(genotypes,genotype_freqa_store)
    correlationp = Find_correlation_df(genotypes,genotype_freqp_store)
    correlationz = Find_correlation_df(genotypes,genotype_freqz_store)  
    
    #save in a dictionary
    data_dict = Dict(
        
        "genotypenumber_adult_female" => output["genotypenumber_adult_female"],
        "genotypenumber_adult_male" => output["genotypenumber_adult_male"],
        "genotypenumber_pupae_female" => output["genotypenumber_pupae_female"],
        "genotypenumber_pupae_male" => output["genotypenumber_pupae_male"],
        "genotypenumber_zygote_female" => output["genotypenumber_zygote_female"],
        "genotypenumber_zygote_male" => output["genotypenumber_zygote_male"],
        
        "number_females" => total_Nf,
        "relative_number_females" => relative_Nf,
        
        "genotypefreq_adult_female" => genotype_freqaf_store,
        "genotypefreq_adult_male" => genotype_freqam_store,
        "genotypefreq_adult" => genotype_freqa_store,
        
        "genotypefreq_pupae_female" => genotype_freqpf_store,
        "genotypefreq_pupae_male" => genotype_freqpm_store,
        "genotypefreq_pupae" => genotype_freqp_store,
        
        "genotypefreq_zygote_female" => genotype_freqzf_store,
        "genotypefreq_zygote_male" => genotype_freqzm_store,
        "genotypefreq_zygote" => genotype_freqz_store,
        
        "allelefreq_adult" => allele_freqa,
        "allelefreq_pupae" => allele_freqp,
        "allelefreq_zygote" => allele_freqz,
        
        "allelefreq_adult_female" => allele_freqaf,
        "allelefreq_pupae_female" => allele_freqpf,
        "allelefreq_zygote_female" => allele_freqzf,
        
        "allelefreq_adult_male" => allele_freqam,
        "allelefreq_pupae_male" => allele_freqpm,
        "allelefreq_zygote_male" => allele_freqzm,
        
        "correlation_adult" => correlationa,
        "correlation_pupae" => correlationp,
        "correlation_zygote" => correlationz
    );
    return(data_dict)
end

"""Calculate the row-wise frequency"""
function Find_freq_from_no_df(input)
    input./[sum(row) for row = eachrow(input)]
end

"""Calculate the fraction of females relative to generation zero"""
function Find_relative_Nf(Nf_store)
    pupae = (Nf_store.pupae)/(Nf_store.pupae[1])
    adult = (Nf_store.adult)/(Nf_store.adult[1]);
    relative_Nf = DataFrame(pupae = float.(pupae),
                    adult=float.(adult))
end

"""
Takes in the genotype frequencies of a population at a single time point and 
calculates the allele frequency.
"""
function Find_allele_frequencies_from_genotypes(genotypes,freq_genotype)
    allele_freqs = Vector{Float64}()
    for allele in ['A','a','α','B','b','β']
        allele_freq = 0
        for i in 1:length(genotypes)
            if (allele in genotypes[i][1]) & (allele in genotypes[i][2])
                allele_freq = allele_freq+freq_genotype[i]
            elseif (allele in genotypes[i][1]) | (allele in genotypes[i][2])
                allele_freq = allele_freq+(freq_genotype[i]/2)
            end
        end
       push!(allele_freqs,allele_freq)
    end
    return(allele_freqs)
end
"""
Takes in the genotype frequencies of a population through time (tx45 matrix) and 
calculates the allele frequency at each timepoint
Returns a tx6 dataframe with columns labelled by allele
"""
function Find_allele_frequencies_from_genotypes_df(genotype_freq_store)
    header = Symbol.(['A','a','α','B','b','β'])
    Make_dataframe([Find_allele_frequencies_from_genotypes(genotypes,row) for row = eachrow(genotype_freq_store)],header)
end

"""
Takes in the genotype frequencies of a population at a single time point and 
calculated the frequency of each haploid genotype (chromosome).
"""
function Find_chromosome_frequencies_from_genotypes(genotypes,freq_genotype)
    allele_freqs = []
    for allele in alleles
        allele_freq = 0
        for i in 1:length(genotypes)
            if (allele in genotypes[i])
                if genotypes[i][1] == genotypes[i][2]
                    allele_freq = allele_freq+(freq_genotype[i])
                else
                    allele_freq = allele_freq+(freq_genotype[i]/2)
                end
            end
        end
       append!(allele_freqs,allele_freq)
    end
    return(allele_freqs)
end
"""
Takes in the genotype frequencies of a population  through time (tx45 matrix) and 
calculated the frequency of each haploid genotype (chromosome) at each timepoint.
Returns a tx9 dataframe with columns labelled by haploid genotype.
"""
function Find_chromosome_frequencies_from_genotypes_df(genotype_freq_store)
    header = Symbol.(alleles)
    Make_dataframe([Find_chromosome_frequencies_from_genotypes(genotypes,row) for row = eachrow(genotype_freq_store)],header)
end

"""
Takes in the genotype frequencies of a population at a single time point and 
calculated the correlation bewteen the constructs a and b.
"""
function Find_correlation(genotypes,genotype_freq)
    
    chrom_freq = Find_chromosome_frequencies_from_genotypes(genotypes,genotype_freq)
    ab_freq = chrom_freq[5]
    allele_freq = Find_allele_frequencies_from_genotypes(genotypes,genotype_freq)
    
    a_freq = round(allele_freq[2],digits = 10)
    b_freq = round(allele_freq[5],digits = 10)

    if ((float(b_freq) > 0.0))
        cor = (ab_freq - (a_freq*b_freq))/sqrt(float(a_freq*(1-a_freq)*b_freq*(1-b_freq)))
    else
        cor = 0.0
    end
end
"""
Takes in the genotype frequencies of a population  through time (tx45 matrix) and 
calculated the correlation bewteen the constructs a and b at each timepoint.
Returns a tx1 dataframe
"""
function Find_correlation_df(genotypes,genotype_freq_store)
    header = ["Correlation"]
    Make_dataframe([Find_correlation(genotypes,row) for row = eachrow(genotype_freq_store)],header)
end

"""Processes a matrix into a dataframe"""
function Make_dataframe(store, header)
    df = DataFrame(transpose(hcat(store...)))
    rename!(df, header)
end

Find_correlation (generic function with 1 method)


###### Input/Output functions

In [29]:
"""
Writes output dictionary to file, with each entry of the dictionary being written to a different txt file.
"""
function Sim_IO_save(file_prefix,
                            params,
                            data_dict,
                            t)
    
    #Write to info file the parameters used to run the sumulation
    fn = file_prefix*"_info.txt"
    io = open(fn, "w");
    write(io,"sim_length \t"*string(t)*"\n")
    p_label = keys(params)
    for p in p_label
        write(io,p*"\t"*string(params[p])*"\n")
    end
    close(io)
    
    #Write to file the data according to the dictionary label
    for i in [keys(data_dict)...]
        CSV.write(file_prefix*"_"*i*".csv", data_dict[i])
    end
end

"""
Reads output files and stores in dictionary.
"""
function Sim_IO_read(file_prefix,
                               data_labels)
    data_dict = Dict()
    for label in data_labels
        data_dict[label] = CSV.read(file_prefix*"_"*label*".csv",DataFrame)
    end
    return(data_dict)
end

MDFSXshredder_IO_save (generic function with 1 method)

In [None]:
"""
Unpacks the string construct label and assigns locus types, homing dependencies and X-shredder dependency
"""
function unpack_construct_label(construct_label)
    label_vec = split(construct_label,"_")
    index=2
    LOCUS_A = split(label_vec[index],"-")[2]
    if LOCUS_A == "HS"
        LOCUS_A = "HS"*"_"*label_vec[3]
        index = index+1
    end
    LOCUS_B = split(label_vec[index+1],"-")[2]
    if LOCUS_B == "HS"
        LOCUS_B = "HS"*"_"*label_vec[4]
        index = index+1
    end
    set_A_homing_dependency = split(label_vec[index+2],"-")[2]
    set_B_homing_dependency = split(label_vec[index+3],"-")[2]
    set_XS_dependency = split(label_vec[index+4],"-")[2]
    if set_XS_dependency == "constitutive"
        set_XS_dependency = set_XS_dependency*"_"*label_vec[index+5]
    end

    return(LOCUS_A,
            LOCUS_B,
            string(set_A_homing_dependency),
            string(set_B_homing_dependency),
            string(set_XS_dependency))
end


###### Parameter screen

In [2]:
"""
Calculates the number of simulations in a 2-dimentional parameter search
"""
function Number_sims(B_pars,var_pars)
    counter = 0
    for i in B_pars
        for j in var_pars
             counter += 1
        end
    end
    return(counter)
end

"""
Defines starting population and matrices, runs a single simulation and calculates additional metrics from output
for a specific combination of parameters and starting frequency of functional cleavage resistance at the A locus. 
pcomn is a list of two entries, the first being the resistant frequency of the resistant genotypes,
and the second being a list of parameters for the corresponding list of parameter labels (var_symbol_vec).
For example, if running a simulation with the o resistance allele at 50%, r=0.2 and m =0.8: 
    pcomb = [0.5,[0.2,0.8]]
    resistant allele = "α"
    var_symbol_vec = ["r","m"]
"""
function Parameter_screen_single(pcomb, 
                                params,
                                output_folder,
                                var_symbol_vec,
                                A_construct,
                                B_construct,
                                A_homing_dependency,
                                B_homing_dependency,
                                XS_dependency,
                                fitness_effect,
                                transgenic_genotype,
                                release_freq, 
                                resistant_allele,
                                t)

        resistant_freq,var_vec = pcomb

        #MAKE STARTING resistant_freq PARAMETER FOLDER
        output_standingres_folder = output_folder*"/Sim_t$(t)_variable_standingres$(resistant_freq)"
        if isdir(output_standingres_folder) == false
            mkdir(output_standingres_folder)
        end
    
        #MAKE STARTING variable parameter FOLDER
        var_label = ""
        for i in collect(1:length(var_symbol_vec))
            var_label_i = var_symbol_vec[i]
            var_label_i = "_$(var_label_i)$(var_vec[i])"
            var_label = var_label*var_label_i
        end
        output_loc = output_standingres_folder*"/Sim_t$(t)_variable_standingres$(resistant_freq)$(var_label)"
            if isdir(output_loc) == false
            mkdir(output_loc)
        end

        #SET VARIABLE PARAMETER
        for i in collect(1:length(var_symbol_vec))
            params[var_symbol_vec[i]] = var_vec[i]
        end
    
        #Define starting population
        starting_population_post_release = release_transgenic_males(
            Initiate_prerelease_population_3lifestages(resistant_allele, resistant_freq, params),
            Symbol(transgenic_genotype),
            release_freq)

        #Define matrices
        matrices = make_matrices(params,
                        A_construct,
                        B_construct,
                        A_homing_dependency,
                        B_homing_dependency,
                        XS_dependency);

        #simulate time series
        output = Simulate_timeseries(
                    params,
                    matrices,
                    fitness_effect,
                    t,
                    starting_population_post_release)
    
        unique_file_prefix = output_loc*"/Sim_t$(t)_variable_standingres$(resistant_freq)$(var_label)"

        total_Nf = hcat(sum.(eachrow(output["genotypenumber_pupae_female"])),sum.(eachrow(output["genotypenumber_adult_female"])))
        total_Nf = DataFrame(total_Nf,["pupae","adult"])
        relative_Nf = Find_relative_Nf(total_Nf);
    
        #For larvae
        genotype_freqzf_store = Find_freq_from_no_df(output["genotypenumber_zygote_female"])
        genotype_freqzm_store = Find_freq_from_no_df(output["genotypenumber_zygote_male"])
        genotype_freqz_store = (genotype_freqzf_store.+genotype_freqzm_store)./2;
        allele_freqz = Find_allele_frequencies_from_genotypes_df(genotype_freqz_store);
    
        #For pupae 
        genotype_freqaf_store = Find_freq_from_no_df(output["genotypenumber_pupae_female"])
        genotype_freqam_store = Find_freq_from_no_df(output["genotypenumber_pupae_male"])
        genotype_freqa_store = (genotype_freqaf_store.+genotype_freqam_store)./2;
        allele_freqa = Find_allele_frequencies_from_genotypes_df(genotype_freqa_store);

        #For adults
        genotype_freqrf_store = Find_freq_from_no_df(output["genotypenumber_adult_female"])
        genotype_freqrm_store = Find_freq_from_no_df(output["genotypenumber_adult_male"])
        genotype_freqr_store = (genotype_freqrf_store.+genotype_freqrm_store)./2;
        allele_freqr = Find_allele_frequencies_from_genotypes_df(genotype_freqr_store);

        data_dict = Dict(
            "number_females" => total_Nf,
            "relative_number_females" => relative_Nf,
            "allelefreq_pupae" => allele_freqa,
            "allelefreq_adult" => allele_freqr,
            "allelefreq_zygote" => allele_freqz,
        );
     #save
      Sim_IO_save(unique_file_prefix,
                        params,
                        data_dict,
                        t)

      
    println("Completed sim res$(resistant_freq)$(var_label)")
end

"""
Takes in a range of resistant frequencies (res_pars) and paramaters (var_pars) and runs many simulations in parallel. 
For example entering 
    res_pars = [0.5,0.6]
    var_pars = [(0.2,0.8),(0.3,0.9)]
    var_symbol = ["r","m"]
Would run 2 simulations
    1: res freq = 0.5, r = 0.2, m = 0.8 
    2: res freq = 0.6, r = 0.3, m = 0.9 
"""
function Parameter_screen_parallel(output_folder,
                                        params,
                                        res_pars,
                                        var_symbol,
                                        var_pars,
                                        A_construct,
                                        B_construct,
                                        A_homing_dependency,
                                        B_homing_dependency,
                                        XS_dependency,
                                        fitness_effect,
                                        transgenic_genotype,
                                        release_freq, 
                                        resistant_genotype, 
                                        t)
    
    param_screen = [(start_res_freq,var) for start_res_freq in res_pars, var in var_pars]
    pmap(x -> Parameter_screen_single(x,
                                    params,
                                    output_folder,
                                    var_symbol,
                                    A_construct,
                                    B_construct,
                                    A_homing_dependency,
                                    B_homing_dependency,
                                    XS_dependency,
                                    fitness_effect,
                                    transgenic_genotype,
                                    release_freq, 
                                    resistant_genotype, 
                                    t), 
                                    param_screen);
end


Parameter_screen_parallel