In [1]:
using PorousMaterials # Pkg.clone("https://github.com/SimonEnsemble/PorousMaterials.jl", "v0.1.1")
using DelimitedFiles
using LinearAlgebra
using JLD2
using CoherentPointDrift
import Bio3DView
using ProgressMeter
using Printf

# Database of cages

Cage database CDB41 (41 cages) from [here](https://github.com/marcinmiklitz/CDB41). Cleaned of solvent. Thanks to Kim Jelfs and M. Miklitz for kindly providing the cages.

Reference:
> M. Miklitz, S. Jiang, R. Clowes, M. E. Briggs, A. I. Cooper and K. E. Jelfs, Computational Screening of Porous Organic Molecules for Xenon/Krypton Separation, J. Phys. Chem. C, 2017. DOI: 10.1021/acs.jpcc.7b03848

Other set of 33 cages from the .xyz files deposited as ESI of DOI 10.1038/s41467-018-05271-9

> R. L. Greenaway, V. Santolini, M. J. Bennison, B. M. Alston, C. J. Pugh, M. A. Little, M. Miklitz, E. G. B. Eden-Rump, R. Clowes, A. Shakil, H. J. Cuthbertson, H. Armstrong, M. E. Briggs, K. E. Jelfs & A. I. Cooper. High-throughput discovery of organic cages and catenanes using computational screening fused with robotic synthesis. Nature Communications, 2018. DOI: 10.1038/s41467-018-05271-9
                                                              
I manually looked at Fig 4 in the main text and selected those that were synthesized. Some cages required visualization to compare to Fig 4 since e.g. there are multiple versions of B23, a few of which were apparently not synthesized but only computational predictions.

The `.xyz` files describing the molecular structure of all of these cages are in the directory `all_cages`.

In [2]:
cages = readdlm("all_cages/all_cages.txt", String)[:];

# Center cages

The center of mass will be set to be the origin.
Each cage will be rotated such that it is aligned with its principle moments of inertia.
i.e. the moment of inertia matrix of an "aligned" cage is diagonal.


First, a function to compute the center of mass of a group of `atoms` at Cartesian coordinates `x`, stored in the columns.

In [3]:
function center_of_mass(atoms::Array{Symbol, 1}, x::Array{Float64, 2})
    ams = read_atomic_masses()
    com = [0.0, 0.0, 0.0]
    total_mass = 0.0
    for (i, atom) in enumerate(atoms)
        com += ams[atom] * x[:, i]
        total_mass += ams[atom]
    end
    return com / total_mass
end

center_of_mass (generic function with 1 method)

In [4]:
function centered_cage_coords(cage::AbstractString)
    # read in raw .xyz from `all_cages`
    atoms, x = read_xyz("all_cages/" * cage * ".xyz")
    
    # compute center of mass
    x_com = center_of_mass(atoms, x)
    
    # shift coords so that cage is centered
    x = x .- x_com
    
    # write centered cage coords
    write_xyz(atoms, x, "centered_cages/" * cage * ".xyz")
    
    return atoms, x
end

centered_cage_coords (generic function with 1 method)

In [5]:
function moment_of_inertia(atoms::Array{Symbol, 1}, x::Array{Float64, 2})
    @assert(isapprox(center_of_mass(atoms, x), [0.0, 0.0, 0.0], atol=0.0001),
        "cage must be centered to compute moment of inertia matrix!")
    
    # read in atomic masses
    ams = read_atomic_masses()
    
    # moment of inertia matrix `mi`
    mi = zeros(Float64, 3, 3)
    for i = 1:3
        for j = 1:3
            for (a, atom) in enumerate(atoms)
                if i == j
                    ids = collect(setdiff(Set([1, 2, 3]), Set([i])))
                    mi[i, j] += ams[atom] * sum(x[ids, a] .^ 2)
                else
                    mi[i, j] -= ams[atom] * x[i, a] * x[j, a]
                end
            end
        end
    end
    
    # should be symmetric!
    @assert isapprox(mi, mi')
        
    return mi
end

function diagonlize_moment_of_inertia(atoms::Array{Symbol, 1}, x::Array{Float64, 2})
    @assert(isapprox(center_of_mass(atoms, x), [0.0, 0.0, 0.0], atol=0.0001),
        "cage not centered!")
  
    # compute moment of inertia matrix
    mi = moment_of_inertia(atoms, x)

    # diagnomize moment of inertia matrix
    λ, v = eigen(mi) # columns of v are eigenvectors
    
    # reorder eigenvalues and eigenvectors from large to small
    ids = sortperm(λ, rev=true) # sort eigenvalues from large to small.
    λ = λ[ids]
    v = v[:, ids] # eigenvectors in columns
    
    # tests out of paranoia
    @assert(isapprox(mi * v, v * diagm(0 => λ)), "eigenvectors not right")
    @assert(isapprox([norm(v[:, i]) for i = 1:3], ones(3)), "eigenvectors not unit vectors")
    @assert(isapprox(mi, v * diagm(0 => λ) * v'))
    # `v` is a rotation matrix b/c `mi` is symmetric and the columns are orthonormal
    # i.e. `v` is a unitary matrix. now use it to rotate x to align with principle axes of rotation
    @assert(isapprox(v' * v, Diagonal{Float64}(I, 3)), "v is not unitary...")
    @assert (λ[1] >= λ[2]) & (λ[2] >= λ[3])
    
    return λ, v, mi
end

function rotational_inertia_aligned_cage(cage::AbstractString)
    atoms, x = centered_cage_coords(cage)
    λ, v, mi = diagonlize_moment_of_inertia(atoms, x)
    
    # rotate cage so as to align it with the principle axes of inertia
    x = v' * x
    
    # now ensure its moment of inertia matrix is diagonal
    mi = moment_of_inertia(atoms, x)
    @assert(isapprox(diagm(0 => diag(mi)), mi), "not aligned with principle axes")

    # assert moments of inertia are ordered largest to smallest
    @assert(mi[1, 1] >= mi[2, 2])
    @assert(mi[2, 2] >= mi[3, 3])
    return atoms, x
end

rotational_inertia_aligned_cage (generic function with 1 method)

In [12]:
if ! isdir("molecule_aligned_cages")
    mkdir("molecule_aligned_cages")
end
write_final_aligned_cage(atoms::Array{Symbol, 1}, x::Array{Float64, 2}, cage::AbstractString) = write_xyz(atoms, x, "molecule_aligned_cages/" * cage * ".xyz")
read_final_aligned_cage(cage::AbstractString) = read_xyz("molecule_aligned_cages/" * cage * ".xyz")

read_final_aligned_cage (generic function with 1 method)

In [13]:
function read_jld_file_results(cage_y::AbstractString, cage_x::AbstractString; verbose::Bool=false)
    jldfilename = jldopen(@sprintf("cpd_results/align_%s_to_%s_molecule.jld2", cage_y, cage_x))
    R = read(jldfilename, "R")
    σ² = read(jldfilename, "σ²")
    ℓ = read(jldfilename, "ℓ")
    @assert read(jldfilename, "cage_y") == cage_y
    @assert read(jldfilename, "cage_x") == cage_x
    close(jldfilename)
    return R, σ², ℓ
end

function find_pair_to_align(cage_ys::Array{String, 1}, cage_xs::Array{String, 1})
    # look at all possible pairs
    pairz = Tuple{String, String}[]
    ℓs = Float64[]
    for cage_y in cage_ys
        for cage_x in cage_xs
            if cage_y == cage_x
                continue
            end
            
            R, σ², ℓ = read_jld_file_results(cage_y, cage_x)
            
            push!(ℓs, ℓ)
            push!(pairz, (cage_y, cage_x))
        end
    end
    best_pair = pairz[argmin(ℓs)]
    return best_pair[1], best_pair[2]
end

find_pair_to_align (generic function with 1 method)

In [14]:
aligned = Dict(cage => false for cage in cages)

cage_y, cage_x = find_pair_to_align(cages, cages)
aligned[cage_x] = true

while sum([! aligned[cage] for cage in cages]) > 0
    
    aligned_cages = cages[[aligned[cage] for cage in cages]]
    unaligned_cages = cages[[! aligned[cage] for cage in cages]]
    
    cage_y, cage_x = find_pair_to_align(unaligned_cages, aligned_cages)
    printstyled(@sprintf("Aligning cage %s to %s...\n", cage_y, cage_x), color=:yellow)
    @assert aligned[cage_x] && ! aligned[cage_y]
    
    atoms_x, x = read_final_aligned_cage(cage_x)
    
    atoms_y, y = rotational_inertia_aligned_cage(cage_y)
    
    # transformation is applied not to reference cage but the unaligned cage!
    R, t, σ², ℓ = CoherentPointDrift.rigid_point_set_registration(x, y, verbose=false,
        w=0.0, σ²_tol=0.05, q_tol=1.0, max_nb_em_steps=30, print_ending=true)

    write_final_aligned_cage(atoms_y, R * y, cage_y)
    
    aligned[cage_y] = true
end

[33mAligning cage CC9 to CC10...[39m


SystemError: SystemError: opening file molecule_aligned_cages/CC10.xyz: No such file or directory

In [None]:
# aligned = Dict(cage => false for cage in cages)

# me = "B1"
# aligned[me] = true
# atoms, x = centered_cage_coords(me)
# write_final_aligned_cage(atoms, x, me)

# while sum([! aligned[cage] for cage in cages]) > 0
#     # cage is unaligned. find best to align to among already-aligned cages
#     printstyled("Finding best cage to align with me," * me * "...", color=:green)

#     unaligned_cages = cages[[! aligned[cage] for cage in cages]]
#     filter(x -> x != me, unaligned_cages)
    
#     align_to_me = find_best_cage_to_align_to(me, unaligned_cages)# , cage_names=aligned_cages)
#     printstyled(align_to_me * " aligns with me best.\n", color=:red)
    
#     atoms_x, x = rotational_inertia_aligned_cage(me)
#     atoms_y, y = rotational_inertia_aligned_cage(align_to_me)
    
#     println("\tCoherent point drift params: ")
#     R, t, σ², ℓ = CoherentPointDrift.rigid_point_set_registration(x, y, verbose=false,
#             w=0.0, σ²_tol=0.1, q_tol=1.0, max_nb_em_steps=25, print_ending=true)
    
#     write_final_aligned_cage(atoms_y, R * y, align_to_me)
#     aligned[align_to_me]= true
#     me = align_to_me
# end