# Parametric Bootstrapping Force Peak Ruptures

In [None]:
include("gibbs.jl");
plt = palette(:default);

## Inputs to be specified by user

In [None]:
NL = 24; # Number of linkers in CDH23
NM = 80; # Number of CDH23 monomers in a bundle

## Load the Gibbs samples

In [None]:
dfsmp = CSV.read("gibbssmp.csv",DataFrame);
SMP = [dfsmp[i,j] for i=1:nrow(dfsmp),j=1:ncol(dfsmp)];
_,nCa = size(SMP);
SMP = reshape(SMP,8,:,nCa);
ngibbs = size(SMP)[2];
println("Gibbs samples are stored in SMP which is nCfg x ngibbs x nCa array.");
println("Configs's are ordered like p000 p001 p011 p111 p010 p100 p101 p110 along first index.");

## Calibrate to the force peak rupture data
### Load the data

In [None]:
dftmp = CSV.read("Force peaks grouped by rupture.csv",DataFrame);
replace!(dftmp[!,"Rupture"],"Ca2+ coordination"=>"Ca²⁺ coordination");
dftmp

In [None]:
# create dictionary assigning Ca2+ state to its rupture type
rpts = Dict{Int64,String}(1=>"beta sheet",2=>"beta sheet",3=>"Ca²⁺ coordination",4=>"Ca²⁺ coordination",
                          5=>"beta sheet",6=>"beta sheet",7=>"Ca²⁺ coordination",8=>"beta sheet");

In [None]:
gdf = groupby(dftmp,"Rupture");
pltbx = boxplot(dftmp[!,"Rupture"],dftmp[!,end],alpha=0.75,labels="smd: v = 0.1 nm/ns");
scatter!(pltbx,fill(1.5,nrow(gdf[(Rupture="beta sheet",)])),gdf[(Rupture="beta sheet",)][!,end],labels="",c=plt[2])
scatter!(pltbx,fill(0.5,nrow(gdf[(Rupture="Ca²⁺ coordination",)])),gdf[(Rupture="Ca²⁺ coordination",)][!,end],labels="",c=plt[2])

plot!(pltbx,xlabel="rupture",ylabel="force (pN)",size=(450,300))

In [None]:
savefig("frpts_bytype.pdf");

### Fit the Gaussian mixture models

In [None]:
"""
    loocv(x::AbstractVector,σ::Real)
Compute the average log-likelihood of calibrated Gaussian mixture models on withhold set by leave one out cross-validation where the Gaussians are given standard deviation σ.
"""
function loocv(x::AbstractVector,σ::Real)
    nsmp = length(x); tmp = 0.; val = fill(0.,nsmp);
    for ℓ=1:nsmp,k=1:nsmp
        if k==1
            tmp = 0.0;
        end
        if k==ℓ
            continue
        end
        
        N = Normal(x[k],σ);
        tmp += ( pdf(N,x[ℓ])/(nsmp-1) );
        
        if (k==nsmp)||(k==nsmp-1 && ℓ==nsmp)
            val[ℓ] = tmp |> log;
        end 
    end

    return sum(val)/nsmp
end;

In [None]:
hax = 0.1:0.1:400;
βcrsvals = [loocv(gdf[(Rupture="beta sheet",)][!,end],h) for h∈hax];
Cacrsvals = [loocv(gdf[(Rupture="Ca²⁺ coordination",)][!,end],h) for h∈hax];

p1 = plot(hax,exp.(βcrsvals),labels="beta sheet",size=(450,300),xlabel="σ-bandwidth",ylabel="exp(avg log-likelihood)",linewidth=3)
id = findfirst(βcrsvals.==maximum(βcrsvals)); βσ = hax[id];
vline!(p1,[βσ],labels="σ=$(round(βσ,digits=4))",linestyle=:dash,linewidth=3)
plot!(xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14)

p2 = plot(hax,exp.(Cacrsvals),labels="Ca²⁺ coordination",size=(450,300),xlabel="σ-bandwidth",ylabel="",linewidth=3)
id = findfirst(Cacrsvals.==maximum(Cacrsvals)); Caσ = hax[id];
vline!(p2,[Caσ],labels="σ=$(round(Caσ,digits=4))",linestyle=:dash,linewidth=3)
plot!(xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14)

plot!(p1,yformatter=(x->round(x,sigdigits=3)))
plot!(p2,yformatter=(x->round(x,sigdigits=3)))
plot(p1,p2,size=(900,300),margin=4mm)

In [None]:
savefig("kde_bndwth.pdf");

## Analyze goodness of fit
#### Visually inspect the fits

In [None]:
nsmp = 200000;
# sample βs and Casmp
βsmp = [rand(gdf[(Rupture = "beta sheet",)][!,end])+βσ*randn() for ℓ=1:nsmp];
Casmp = [rand(gdf[(Rupture = "Ca²⁺ coordination",)][!,end])+Caσ*randn() for ℓ=1:nsmp];

pltvl = deepcopy(pltbx);
violin!(pltvl,fill(0.5,nsmp),Casmp,alpha=0.25,c=plt[4],labels="fit: Gaussian mixture")
violin!(pltvl,fill(1.5,nsmp),βsmp,alpha=0.25,labels="",c=plt[4])

plot!(pltvl,xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14)

In [None]:
savefig("frpts_bytypewfit.pdf");

#### Run nonparametric KS test to see if beta and $Ca^{2+}$ ruptures are statistically significant: all samples from one density estimated distribution

In [None]:
hax = 0.1:0.1:400;
aggcrsvals = [loocv(dftmp[!,end],h) for h∈hax];
p3 = plot(hax,exp.(aggcrsvals),labels="aggregate",size=(450,300),xlabel="σ-bandwidth",ylabel="exp(avg log-likelihood)",linewidth=3)
id = findfirst(aggcrsvals.==maximum(aggcrsvals)); aggσ = hax[id];
vline!(p3,[aggσ],labels="σ=$(round(aggσ,digits=4))",linestyle=:dash,linewidth=3)
plot!(xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14)

In [None]:
nsmp = 200000;
# sample βs and Casmp
aggsmp = [rand(dftmp[!,end])+aggσ*randn() for ℓ=1:nsmp];

p4 = boxplot(["aggregate"],dftmp[!,end],alpha=0.75,labels="smd: v = 0.1 nm/ns");
scatter!(p4,fill(0.5,nrow(dftmp)),dftmp[!,end],labels="",c=plt[2]);
violin!(p4,fill(0.5,nsmp),aggsmp,alpha=0.25,c=plt[4],labels="fit: Gaussian mixture")
plot!(p4,xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14)
plot!(p4,xlabel="rupture",ylabel="force (pN)",size=(1.25*450,1.25*300))

In [None]:
# btstp sample the agg distribution 7 and 5 times for the Ca2+ and beta sheets
nsmp = 500000;
btstpagg = rand(dftmp[!,end],12,nsmp) + aggσ*randn(12,nsmp);
btstpbeta = btstpagg[1:7,:]; btstpca = btstpagg[8:end,:];

# Monte Carlo sample ks's of semi-parametric bootstrapped values
ksdistr = [maximum(abs.(quantile(btstpbeta[:,k],LinRange(0,1,101))
                - quantile(btstpca[:,k],LinRange(0,1,101)))) for k=1:nsmp];

# compute the observed ks
ksobs = maximum(abs.(quantile(gdf[(Rupture="beta sheet",)][!,end],LinRange(0,1,101))
                - quantile(gdf[(Rupture="Ca²⁺ coordination",)][!,end],LinRange(0,1,101))));
                
# compute how many are more extreme
println("ks p-value if treat all smd samples as estimating one density: $(sum(ksdistr .>= ksobs)/nsmp)")

#### Similar to above now compute if difference in population means for force at rupture is significant

In [None]:
# Monte carlo sample magnitude of difference in means of semi-parametric bootstrapped values
μdistr = [abs(mean(btstpbeta[:,k])-mean(btstpca[:,k])) for k=1:nsmp]

# compute the magnitude of the observed difference in means
μobs = abs(mean(gdf[(Rupture="beta sheet",)][!,end]) - mean(gdf[(Rupture="Ca²⁺ coordination",)][!,end]))

# compute how many are more extreme
println("difference in mean p-value if treat all smd samples as estimating one density: $(sum(μdistr .>= μobs)/nsmp)")

## Parametric bootstrap force peaks at ruptures

In [None]:
nsmp = 3000;
fsmps = Matrix{Float64}(undef,nCa,nsmp); mnr = Vector{Float64}(undef,NL)
ps = fill(0.,8); ps[1] = 1.; P = Categorical(ps);
for i=1:nCa,j=1:nsmp,k=1:NL
    if k==1
        # redraw a gibbs Ca2+ distribution for this monomer
        id = rand(1:ngibbs);
        ps[:] = SMP[:,id,i];
        P.p[:] = ps;
    end
    
    # fill the linker region with Ca²⁺ state
    typ = rand(P);
    
    # sample a rupture force from parametric distribution
    μ = rand(gdf[(Rupture = rpts[typ],)][!,end]);
    σ = rpts[typ] == "beta sheet" ? βσ : Caσ;
    
    mnr[k] = μ+σ*randn();
    
    if k==NL
        # compute the rupture force across full monomer
        fsmps[i,j] = minimum(mnr);
    end
end

### Plot tiplink rupture forces as function of $[Ca^{2+}]$

In [None]:
fμ = sum(fsmps,dims=2)/nsmp; 
lw = [minimum(fsmps[ℓ,:]) for ℓ=1:nCa]; up = [maximum(fsmps[ℓ,:]) for ℓ=1:nCa];

plot(1:nCa,fμ,linewidth=3,ribbon=(fμ-lw,up-fμ),fillalpha=0.1,legend=:topleft,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",size=(450,300),labels="v = 0.1 nm/ns");

In [None]:
lw = [quantile(fsmps[ℓ,:],0.025) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.975) for ℓ=1:nCa];

plot!(1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.2,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",labels="",c=plt[1]);

In [None]:
lw = [quantile(fsmps[ℓ,:],0.25) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.75) for ℓ=1:nCa];

plot!(1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.35,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",labels="",c=plt[1])

plot!(xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14);

In [None]:
p1 = plot!();
p2 = deepcopy(p1);
plot!(p2,xlims=(0,50));

In [None]:
fμ = sum(fsmps,dims=2)/nsmp; 
lw = [minimum(fsmps[ℓ,:]) for ℓ=1:nCa]; up = [maximum(fsmps[ℓ,:]) for ℓ=1:nCa];

plot!(p2,inset=(1,bbox(0.025,0.1,0.45,0.35,:bottom,:right)));

plot!(p2[2],1:nCa,fμ,linewidth=3,ribbon=(fμ-lw,up-fμ),fillalpha=0.1,labels="");

In [None]:
lw = [quantile(fsmps[ℓ,:],0.025) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.975) for ℓ=1:nCa];

plot!(p2[2],1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.2,
     labels="",c=plt[1]);

In [None]:
lw = [quantile(fsmps[ℓ,:],0.25) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.75) for ℓ=1:nCa];

plot!(p2[2],1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.35,
     labels="",c=plt[1])

In [None]:
savefig("predfrpt.pdf");

#### Master panel of GMM force fits and rupture

In [None]:
#plot!(p2[1],yticks=0:200:1200);
#plot!(p2[2],yticks=0:300:1200);
plot(pltvl,p2,size=(900,300),margin=4mm)

In [None]:
savefig("mstpanel_frcrpt.pdf");

## Parametric bootstrap bundle ruptures

In [None]:
nsmp = 3000;
fsmps = Matrix{Float64}(undef,nCa,nsmp); 
mnr = Vector{Float64}(undef,NL); bndl = Vector{Float64}(undef,NM);
ps = fill(0.,8); ps[1] = 1.; P = Categorical(ps);

for i=1:nCa,j=1:nsmp,k=1:NM,ℓ=1:NL
    if k==1
        # redraw a gibbs Ca2+ distribution for this bundle
        id = rand(1:ngibbs);
        ps[:] = SMP[:,id,i];
        P.p[:] = ps;
    end
    
    # fill the linker region with Ca²⁺ state
    typ = rand(P);
    
    # sample a rupture force from parametric distribution
    μ = rand(gdf[(Rupture = rpts[typ],)][!,end]);
    σ = rpts[typ] == "beta sheet" ? βσ : Caσ;
    
    mnr[ℓ] = μ+σ*randn();
    
    if ℓ==NL
        # compute the rupture force across full monomer
        bndl[k] = minimum(mnr);
    end
    
    if k==NM
        # compute 50% of tiplinks are ruptured force for the bundle
        fsmps[i,j] = quantile(bndl,0.5);
    end
end

### Plot 50% bundle rupture forces as function of $[Ca^{2+}]$

In [None]:
fμ = sum(fsmps,dims=2)/nsmp; 
lw = [minimum(fsmps[ℓ,:]) for ℓ=1:nCa]; up = [maximum(fsmps[ℓ,:]) for ℓ=1:nCa];

plot(1:nCa,fμ,linewidth=3,ribbon=(fμ-lw,up-fμ),fillalpha=0.1,legend=:topleft,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",size=(450,300),labels="v = 0.1 nm/ns");

In [None]:
lw = [quantile(fsmps[ℓ,:],0.025) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.975) for ℓ=1:nCa];

plot!(1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.2,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",labels="",c=plt[1]);

In [None]:
lw = [quantile(fsmps[ℓ,:],0.25) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.75) for ℓ=1:nCa];

plot!(1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.35,
     xlabel = "[Ca²⁺] (μM)",ylabel="force (pN)",labels="",c=plt[1])

plot!(xtickfontsize=10,ytickfontsize=10,fontsize=12,legendfontsize=10,titlefontsize=14);

In [None]:
p1 = plot!();
p2 = deepcopy(p1);
plot!(p2,xlims=(0,50));

In [None]:
fμ = sum(fsmps,dims=2)/nsmp; 
lw = [minimum(fsmps[ℓ,:]) for ℓ=1:nCa]; up = [maximum(fsmps[ℓ,:]) for ℓ=1:nCa];

plot!(p2,inset=(1,bbox(0.025,0.1,0.45,0.35,:bottom,:right)));

plot!(p2[2],1:nCa,fμ,linewidth=3,ribbon=(fμ-lw,up-fμ),fillalpha=0.1,labels="");

In [None]:
lw = [quantile(fsmps[ℓ,:],0.025) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.975) for ℓ=1:nCa];

plot!(p2[2],1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.2,
     labels="",c=plt[1]);

In [None]:
lw = [quantile(fsmps[ℓ,:],0.25) for ℓ=1:nCa]; up = [quantile(fsmps[ℓ,:],0.75) for ℓ=1:nCa];

plot!(p2[2],1:nCa,fμ,linewidth=0,ribbon=(fμ-lw,up-fμ),fillalpha=0.35,
     labels="",c=plt[1])

In [None]:
savefig("predbrpt.pdf");
CSV.write("bndlrptsmps.csv",DataFrame(fsmps,:auto));