## 1. Prepare data

### 1.1 Generate simulated data

In [1]:
using TorusEvol
using Distributions

# Underlying evolutionary process
t = 0.3; Œª=0.03; Œº=0.031; r=0.4
œÑ = TKF92([t], Œª, Œº, r)
S = WAG_SubstitutionProcess()
Œº_ùúô=-2.0; Œº_ùúì=2.35; œÉ_ùúô=0.4; œÉ_ùúì=0.4; Œ±_ùúô=0.5; Œ±_ùúì=1.0; Œ±_cov=0.1; Œ≥=0.2
Œò = JumpingWrappedDiffusion(Œº_ùúô, Œº_ùúì, œÉ_ùúô, œÉ_ùúì, Œ±_ùúô, Œ±_ùúì, Œ±_cov, Œ≥)
Œæ = MixtureProductProcess([1.0], hcat([S, Œò]))
pair_chain_dist = ChainJointDistribution(Œæ, œÑ)

# Generate data
simulated_data = Tuple{ObservedChain, ObservedChain}[]
n = 3 # data size
for i ‚àà 1:n
    push!(simulated_data, rand(pair_chain_dist))
end

# Render a random sample from the data
(A, B) = rand(simulated_data)
chainA = from_primary_dihedrals(Int.(data(A)[1]), data(A)[2])
chainB = from_primary_dihedrals(Int.(data(B)[1]), data(B)[2])
lp = logpdf(pair_chain_dist, (A, B))
print("The log pdf of A and B is $lp")
render(chainA, chainB; aligned=true)

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling TorusEvol [4b860a26-b3bc-4b38-a9ed-83c1dc5d19b0]
[91m[1mERROR: [22m[39mLoadError: syntax: unexpected ")"
Stacktrace:
 [1] top-level scope
[90m   @[39m [90mC:\Users\stefa\Dropbox\Jotun Hein - Part C Projects\Manolache\Code\TorusEvol\src\distributions\[39m[90m[4mAlignmentProcess.jl:96[24m[39m
 [2] [0m[1minclude[22m[0m[1m([22m[90mmod[39m::[0mModule, [90m_path[39m::[0mString[0m[1m)[22m
[90m   @[39m [90mBase[39m [90m.\[39m[90m[4mBase.jl:457[24m[39m
 [3] [0m[1minclude[22m[0m[1m([22m[90mx[39m::[0mString[0m[1m)[22m
[90m   @[39m [35mTorusEvol[39m [90mC:\Users\stefa\Dropbox\Jotun Hein - Part C Projects\Manolache\Code\TorusEvol\src\[39m[90m[4mTorusEvol.jl:1[24m[39m
 [4] top-level scope
[90m   @[39m [90mC:\Users\stefa\Dropbox\Jotun Hein - Part C Projects\Manolache\Code\TorusEvol\src\[39m[90m[4mTorusEvol.jl:151[24m[39m
 [5] [0m[1minclude[22m
[90m   @[39m [90m.\[39m[9

LoadError: Failed to precompile TorusEvol [4b860a26-b3bc-4b38-a9ed-83c1dc5d19b0] to "D:\\Programs\\julia_depot\\compiled\\v1.9\\TorusEvol\\jl_1E08.tmp".

### 1.2 Load real data

## 2. Parameter Inference Bayesian Model

### 2.1 Set up priors for evolutionary processes

In [2]:
using Turing, DynamicPPL
using LinearAlgebra
using LogExpFunctions 
using Plots, StatsPlots
using Random

import Base: length, eltype
import Distributions: _rand!, logpdf

Turing.setprogress!(true)

struct ScaledBeta <: ContinuousUnivariateDistribution 
    be::Beta 
    function ScaledBeta(Œ±::Real, Œ≤::Real)
        new(Beta(Œ±, Œ≤))
    end
end
Distributions.rand(rng::AbstractRNG, d::ScaledBeta) = rand(d.be)*2 - 1
Distributions.logpdf(d::ScaledBeta, x::Real) = logpdf(d.be, (x+1) / 2)


struct CompetingExponential <: ContinuousMultivariateDistribution 
    ex::Exponential
    function CompetingExponential(rate::Real)
        new(Exponential(rate))
    end
end 
Base.eltype(d::CompetingExponential) = Float64 
Base.length(d::CompetingExponential) = 2

function Distributions._rand!(rng::AbstractRNG, d::CompetingExponential, x::AbstractVector{<:Real})
    Œª = rand(rng, d.ex)
    Œº = rand(rng, d.ex)
    if Œª > Œº 
        tmp = Œª; Œª = Œº; Œº=tmp 
    end
    x .= [Œª, Œº]
    return x
end

function Distributions._logpdf(d::CompetingExponential, x::AbstractArray)
    if x[1] > x[2]
        return -Inf
    end
    return log(2) + logpdf(d.ex, x[1]) + logpdf(d.ex, x[2])
end

@model function tkf92_prior()
    ŒªŒº ~ CompetingExponential(1.0)
    Œª = ŒªŒº[1]; Œº = ŒªŒº[2]
    r ~ Uniform(0.0, 1.0)

    # Require birth rate lower than death rate
    if Œª > Œº || Œª ‚â§ 0 || Œº ‚â§ 0 || r ‚â§ 0 || r ‚â• 1
        Œº = NaN; Œª = NaN
    end
    return Œª, Œº, r
end;

@model function jwndiff_prior()
    Œº ~ filldist(Uniform(-œÄ, œÄ), 2)
    œÉ¬≤ ~ filldist(Gamma(œÄ * 0.1), 2)
    Œ± ~ filldist(Gamma(œÄ * 0.1), 2)
    Œ≥ ~ Exponential(1.0)   # jumping rate
    Œ±_corr ~ ScaledBeta(3, 3)
    
    # Require valid covariance matrices
    if any(œÉ¬≤ .‚â§ 0) || any(Œ± .‚â§ 0) || Œ≥ ‚â§ 0 
        œÉ¬≤ .= NaN; Œ± .= NaN; Œ≥ = NaN
    end
    Œ±_cov = Œ±_corr * sqrt(Œ±[1] * Œ±[2])
    if Œ±_cov^2 > Œ±[1]*Œ±[2]
         Œ±_cov = NaN
    end
    
    return Œº[1], Œº[2], sqrt(œÉ¬≤[1]), sqrt(œÉ¬≤[2]), Œ±[1], Œ±[2], Œ±_cov, Œ≥
end;

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m[Turing]: progress logging is enabled globally
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m[AdvancedVI]: global PROGRESS is set as true


### 2.2 Set up sampler

In [3]:
torus_proposal(v) = MixtureModel([WrappedNormal(v, I), WrappedNormal(v, 20*I)], [0.8, 0.2])
mv_rw_proposal(v::AbstractVector, cov) = MvNormal(v, cov)
rw_proposal(x, var) = Normal(x, var)


sampler = Gibbs(MH(:t => v -> rw_proposal(v, 0.2)),
                MH(Symbol("Œò.Œº") => v -> torus_proposal(v)),
                MH(Symbol("Œò.œÉ¬≤") => v -> mv_rw_proposal(v, 0.4*I)),
                MH(Symbol("Œò.Œ±") => v -> mv_rw_proposal(v, 0.4*I)),
                MH(Symbol("Œò.Œ±_corr") => x -> rw_proposal(x, 0.5)),
                MH(Symbol("Œò.Œ≥") => x -> rw_proposal(x, 0.5)),
                MH(Symbol("œÑ.ŒªŒº") => v -> mv_rw_proposal(v, [0.4 0.1; 0.1 0.6])),
                MH(Symbol("œÑ.r") => x -> rw_proposal(x, 0.5))
               );

In [4]:
using Memoization 

@memoize get_Œ±s(pairs) = get_Œ±.(Ref(TKF92([1.0], 0.2, 0.3, 0.4)), pairs)
@memoize get_Bs(pairs) = get_B.(pairs)

get_Bs (generic function with 1 method)

### 2.3 Prepare probabilistic model

In [5]:
@model function pair_param_inference_simple(pairs)
    # ____________________________________________________________________________________________________
    # Step 1 - Sample prior parameters
    
    # Time parameter
    t ~ Exponential(1.0) 
    # Alignment parameters
    @submodel prefix="œÑ" Œõ = tkf92_prior()
    # Dihedral parameters 
    @submodel prefix="Œò" Œû = jwndiff_prior()
    # Check parameter validity 
    if t ‚â§ 0 || any(isnan.(Œû)) || any(isnan.(Œõ))
        Turing.@addlogprob! -Inf; return
    end
    
    # ____________________________________________________________________________________________________
    # Step 2 - Construct processes 
    
    # Substitution Process - no parameters for simplicity, use fully empirical model
    S = WAG_SubstitutionProcess()
    # Dihedral Process
    Œò = JumpingWrappedDiffusion(Œû...)
    # Joint sequence-structure site level process with one regime
    Œæ = MixtureProductProcess([1.0], hcat([S, Œò]))
    
    # Alignment model
    œÑ = TKF92([t], Œõ...)
    
    # Chain level model
    Œì = ChainJointDistribution(Œæ, œÑ)
    
    # ____________________________________________________________________________________________________
    # Step 3 - Observe each pair X, Y by proxy of their joint probability, marginalising over alignments
    Œ± = get_Œ±s(pairs)
    Threads.@threads for i ‚àà eachindex(pairs)
        X, Y = pairs[i]
        # (X, Y) ~ ChainJointDistribution(Œæ, œÑ)
        Turing.@addlogprob! logpdfŒ±!(Œ±[i], Œì, (X, Y))
    end
        
    return Œì
end;

### 2.4 Sample from the model and check results

In [6]:
num_samples = 300
num_chains = 3
model = pair_param_inference_simple(simulated_data)

LoadError: UndefVarError: `simulated_data` not defined

In [7]:
chain = sample(model, sampler, MCMCThreads(), num_samples, num_chains)
p = plot(chain, fontfamily="JuliaMono")

LoadError: UndefVarError: `model` not defined