In [None]:
using PorousMaterials # Pkg.clone("https://github.com/SimonEnsemble/PorousMaterials.jl", "v0.1.1")
using DelimitedFiles
using LinearAlgebra
using JLD2
using CoherentPointDrift
import Bio3DView

# Database of cages

Cage database CDB41 (41 cages) from [here](https://github.com/marcinmiklitz/CDB41). Cleaned of solvent. Thanks to Kim Jelfs and M. Miklitz for kindly providing the cages.

Reference:
> M. Miklitz, S. Jiang, R. Clowes, M. E. Briggs, A. I. Cooper and K. E. Jelfs, Computational Screening of Porous Organic Molecules for Xenon/Krypton Separation, J. Phys. Chem. C, 2017. DOI: 10.1021/acs.jpcc.7b03848

Other set of 33 cages from the .xyz files deposited as ESI of DOI 10.1038/s41467-018-05271-9

> R. L. Greenaway, V. Santolini, M. J. Bennison, B. M. Alston, C. J. Pugh, M. A. Little, M. Miklitz, E. G. B. Eden-Rump, R. Clowes, A. Shakil, H. J. Cuthbertson, H. Armstrong, M. E. Briggs, K. E. Jelfs & A. I. Cooper. High-throughput discovery of organic cages and catenanes using computational screening fused with robotic synthesis. Nature Communications, 2018. DOI: 10.1038/s41467-018-05271-9
                                                              
I manually looked at Fig 4 in the main text and selected those that were synthesized. Some cages required visualization to compare to Fig 4 since e.g. there are multiple versions of B23, a few of which were apparently not synthesized but only computational predictions.

The `.xyz` files describing the molecular structure of all of these cages are in the directory `all_cages`.

In [3]:
cages = readdlm("all_cages/all_cages.txt", String)[:];

# Center cages

The center of mass will be set to be the origin.
Each cage will be rotated such that it is aligned with its principle moments of inertia.
i.e. the moment of inertia matrix of an "aligned" cage is diagonal.


First, a function to compute the center of mass of a group of `atoms` at Cartesian coordinates `x`, stored in the columns.

In [4]:
function center_of_mass(atoms::Array{Symbol, 1}, x::Array{Float64, 2})
    ams = read_atomic_masses()
    com = [0.0, 0.0, 0.0]
    total_mass = 0.0
    for (i, atom) in enumerate(atoms)
        com += ams[atom] * x[:, i]
        total_mass += ams[atom]
    end
    return com / total_mass
end

center_of_mass (generic function with 1 method)

In [5]:
function centered_cage_coords(cage::AbstractString)
    # read in raw .xyz from `all_cages`
    atoms, x = read_xyz("all_cages/" * cage * ".xyz")
    
    # compute center of mass
    x_com = center_of_mass(atoms, x)
    
    # shift coords so that cage is centered
    x = x .- x_com
    
    # write centered cage coords
    write_xyz(atoms, x, "centered_cages/" * cage * ".xyz")
    
    return atoms, x
end

centered_cage_coords (generic function with 1 method)

# Porosity point clouds

In [6]:
function generate_porosity_point_cloud(atoms::Array{Symbol, 1}, x::Array{Float64, 2}, 
                                       nb_pts_in_porosity_cloud::Int, snapshot_radius::Union{Nothing, Float64})
    # store points in void space here.
    x_porosity_cloud = zeros(3, nb_pts_in_porosity_cloud)

    # determine if void space via potential energy of He
    he = Molecule("He")
    ljff = LJForceField("UFF.csv", cutoffradius=14.0, mixing_rules="geometric")
    ljspheres = Atoms(atoms, x)

    # compute radius of the molecule; this determines variance of Gaussian if snapshot size not provided
    if snapshot_radius == nothing
        snapshot_radius = maximum([norm(x[:, a]) for a = 1:length(atoms)])
    end
    
    n_hits = 0
    while n_hits < nb_pts_in_porosity_cloud
        # bias insertions towards center
        x_insert = randn(3) * snapshot_radius / 3 # scale smaller to bias describing core more.
        if norm(x_insert) > snapshot_radius
            continue
        end

        # put helium at this grid pt
        translate_to!(he, x_insert)

        # compute potential energy of He adsorbate here
        energy = vdw_energy_no_PBC(he, ljspheres, ljff)

        if energy < 298.0
            n_hits += 1
            x_porosity_cloud[:, n_hits] = x_insert
        end
    end
    
    return x_porosity_cloud
end

atoms, x = centered_cage_coords("B13")
generate_porosity_point_cloud(atoms, x, 5000, nothing)

3×5000 Array{Float64,2}:
 -1.52856   0.548317  -2.80718  -0.618359  …  -0.638938  -3.91158   1.50711
  2.17832  -2.14988   -3.82797   1.68525      -0.316719  -5.42064   1.1806 
 -1.41596  -1.56452    4.2871    1.26862       0.764409   4.01625  -1.71362

# Rotate to align principal axes of rotation with Cartesian axes

Next, a function to compute the moment of inertia matrix of a group of `atoms` at Cartesian positions `x`. 

See <a href="https://chem.libretexts.org/Textbook_Maps/Physical_and_Theoretical_Chemistry_Textbook_Maps/Map%3A_Physical_Chemistry_(McQuarrie_and_Simon)/13%3A_Molecular_Spectroscopy/13-08._The_Rotational_Spectrum_of_a_Polyatomic_Molecule_Depends_Upon_the_Principal_Moments_of_Inertia_of_the_Molecule">Chemistry Libre Texts</a> moment of inertia formulas.

In [7]:
function moment_of_inertia(x::Array{Float64, 2})
    # moment of inertia matrix `mi`
    mi = zeros(Float64, 3, 3)
    for i = 1:3
        for j = 1:3
            for a = 1:size(x)[2]
                if i == j
                    ids = collect(setdiff(Set([1, 2, 3]), Set([i])))
                    mi[i, j] += sum(x[ids, a] .^ 2)
                else
                    mi[i, j] -= x[i, a] * x[j, a]
                end
            end
        end
    end
    
    # should be symmetric!
    @assert isapprox(mi, mi')
        
    return mi
end

moment_of_inertia (generic function with 1 method)

In [8]:
viewcage(cage::AbstractString) = Bio3DView.viewfile("final_aligned_cages/" * cage * ".xyz", "xyz")

viewcage (generic function with 1 method)

In [9]:
write_final_aligned_cage(atoms::Array{Symbol, 1}, x::Array{Float64, 2}, cage::AbstractString) = write_xyz(atoms, x, "final_aligned_cages/" * cage * ".xyz")
read_final_aligned_cage(cage::AbstractString) = read_xyz(atoms, x, "final_aligned_cages/" * cage * ".xyz")

read_final_aligned_cage (generic function with 1 method)

In [11]:
nb_pts_in_pt_cloud = 50000

aligned = Dict(cage => false for cage in cages)

for cage in cages
    # read in centered cage
    atoms, x = centered_cage_coords(cage)
    
    # generate porosity point cloud
    x_pt_cloud = generate_porosity_point_cloud(atoms, x, nb_pts_in_pt_cloud, nothing) # use radius of molecule for this.
    write_xyz([:H for i = 1:nb_pts_in_pt_cloud], x_pt_cloud, "centered_cages/porosity_point_clouds/" * cage * ".xyz")
    
    # diagnomize moment of inertia matrix
    mi = moment_of_inertia(x_pt_cloud)
    λ, v = eigen(mi) # columns of v are eigenvalues
    ids = sortperm(λ, rev=true) # sort eigenvalues from large to small.
    λ = λ[ids]
    v = v[:, ids]
    # tests out of paranoia
    @assert(isapprox(mi * v, v * diagm(0 => λ)), "eigenvectors not right")
    @assert(isapprox([norm(v[:, i]) for i = 1:3], ones(3)), "eigenvectors not unit vectors")
    @assert(isapprox(mi, v * diagm(0 => λ) * v'))
    # `v` is a rotation matrix b/c `mi` is symmetric and the columns are orthonormal
    # i.e. `v` is a unitary matrix. now use it to rotate x to align with principle axes of rotation
    @assert(isapprox(v' * v, Diagonal{Float64}(I, 3)), "v is not unitary...")
    @assert (λ[1] >= λ[2]) & (λ[2] >= λ[3])
    
    # align principal axes of rotation of the cage with the Cartesian axes
    write_xyz(atoms, v' * x, "rotational_inertia_aligned_cages/" * cage * ".xyz")
    
    # look for degeneracy
    cage_radius = maximum([norm(x[:, a]) for a = 1:size(x)[2]])
    xy_symmetry = isapprox(λ[1], λ[2], rtol=0.01) # can use rtol b/c fixed # pts now.
    yz_symmetry = isapprox(λ[2], λ[3], rtol=0.01)
    if xy_symmetry || yz_symmetry
        printstyled(cage * " has nearly degenerate axes of inertia\n", color=:red)
    else
        aligned[cage] = true
        write_final_aligned_cage(atoms, v' * x, cage)
        printstyled(cage * " has unique axes of inertia\n", color=:green)
    end
end

[31mA11 has nearly degenerate axes of inertia[39m
[31mB11 has nearly degenerate axes of inertia[39m
[32mB13 has unique axes of inertia[39m
[31mB15 has nearly degenerate axes of inertia[39m
[31mB18 has nearly degenerate axes of inertia[39m
[31mB1 has nearly degenerate axes of inertia[39m
[31mB23 has nearly degenerate axes of inertia[39m
[31mB24 has nearly degenerate axes of inertia[39m
[31mB25 has nearly degenerate axes of inertia[39m
[31mB26 has nearly degenerate axes of inertia[39m
[31mB2 has nearly degenerate axes of inertia[39m
[31mB4 has nearly degenerate axes of inertia[39m
[31mB5 has nearly degenerate axes of inertia[39m
[31mB6 has nearly degenerate axes of inertia[39m
[31mB8 has nearly degenerate axes of inertia[39m
[31mB9 has nearly degenerate axes of inertia[39m
[31mC11 has nearly degenerate axes of inertia[39m
[32mC13 has unique axes of inertia[39m
[31mC15 has nearly degenerate axes of inertia[39m
[32mC18 has unique axes of inertia[39m


# Rigid point set registration to align cages consistently where moments of inertia about principal axes are nearly degenerate

Write porosity point clouds of rotational-inertia-aligned cages for further alignment where principal axes are nearly dgenerate. Store in `porosity_point_clouds`.

In [12]:
nb_pts_in_pt_cloud = 10000

porosity_point_clouds = Dict{AbstractString, Array{Float64, 2}}()

# use same dist'n of points for this one to compare cages easily
for cage in cages
    atoms, x = read_xyz("rotational_inertia_aligned_cages/" * cage * ".xyz")
    porosity_point_clouds[cage] = generate_porosity_point_cloud(atoms, x, nb_pts_in_pt_cloud, nothing)
    write_xyz([:H for i = 1:nb_pts_in_pt_cloud], porosity_point_clouds[cage], 
        "rotational_inertia_aligned_cages/porosity_point_clouds/" * cage * ".xyz")
end

In [None]:
# goal: transform y to best match one of the xs.
function find_best_cage_to_align_to(y::Array{Float64, 2},
                                    xs::Array{Array{Float64, 2}, 1};
                                    cage_names::Array{String, 1}=String[])
    nb_queries = length(xs)

    ℓ = [Inf for i = 1:nb_queries]
    
    # loop thru all queries, see which is best aligned
    for (i, x) in enumerate(xs)
        if length(cage_names) > 0 
            println(cage_names[i])
        end
        if size(x) == size(y) && isapprox(x, y)
            error("cage == ref cage...")
        end
        
        # transformation is applied not to reference cage but the unaligned cage!
        R, t, σ², ℓ[i] = CoherentPointDrift.rigid_point_set_registration(x, y, verbose=false,
            w=0.0, σ²_tol=0.1, q_tol=1.0, max_nb_em_steps=25, print_ending=false)
    end
    # which cage had the best alignment?
    return argmin(ℓ)
end

In [None]:

for cage in cages
    # if cage aligned uniquely with axes of inertia, carry on to the next cage.
    if aligned[cage]
        printstyled(cage * " aligned via principal axes of inertia\n", color=:yellow)
        continue
    end
    
    # cage is unaligned. find best to align to among already-aligned cages
    printstyled("Aligning " * cage * " to ...", color=:green)

    aligned_cages = cages[[aligned[cage] for cage in cages]]
    # get their porosity point clouds
    xs = [porosity_point_clouds[cage] for cage in aligned_cages]
    # y is this cage's point cloud
    y = porosity_point_clouds[cage]
    # find best cage to align to
    i_align_to = find_best_cage_to_align_to(y, xs)# , cage_names=aligned_cages)
    align_to = aligned_cages[i_align_to]
    printstyled(align_to * "\n", color=:red)
    println("\tCoherent point drift params: ")
    
    R, t, σ², ℓ = CoherentPointDrift.rigid_point_set_registration(xs[i_align_to], y, verbose=false,
            w=0.0, σ²_tol=0.1, q_tol=1.0, max_nb_em_steps=25, print_ending=true)
    
    atoms, y = centered_cage_coords(cage)
    write_final_aligned_cage(atoms, R * y, cage)
    aligned[cage] = true
end

In [None]:
x = generate_porosity_point_cloud("CC3", 50000, 20.0)
y = generate_porosity_point_cloud("NC2", 5000, nothing)