# QE to cif

This takes the output file `.out` of a crystal that has gone through the structural optimization and generates a `.cif` file that can be used in gcmc calculations.

## TODO: 
- check output file to make sure that is converged
    - if it didn't converge, flag the file and issue a warning
    - possible check: occursin("bfgs converged in", file.out)
    - check for: "scf_must_converge=.false." as well?

In [1]:
using PorousMaterials, LinearAlgebra

In [2]:
# crystal_name = "NiPyC2_vc-relax.cif"
# crystal = Crystal(crystal_name)
# # crystal.box.f_to_c

In [3]:
function is_vcrelax(qe_output_filename::String)
    if occursin("vc-relax", qe_output_filename)
        return true
    else
        return false
    end
end

function check_if_converged(lines::Array{String,1})
    for line in lines
        if occursin("bfgs converged in", line)
            return true
        end
    end
    return false
end

function find_calculation_end(lines::Array{String,1})
    start_coords = 0
    # determine if the calculation converged
    bfgs_conv = check_if_converged(lines) 
    for line in lines
        start_coords += 1        
        if bfgs_conv && occursin("Begin final coordinates", line)
            return start_coords 
        elseif !bfgs_conv && occursin("End of BFGS Geometry Optimization", line)
            return start_coords
        end
    end
#     @error "Could not locate the end of the BFGS calculation."
end

function exchange_correlation_type(lines::Array{String,1})
    for line in lines
        if occursin("Exchange-correlation", line)
            return split(line)[2]
        end
    end
end

function find_atomic_positions(lines::Array{String,1})
    start_ind = find_calculation_end(lines)
    for line in lines[start_ind:end]
        if occursin("ATOMIC_POSITIONS", line)
            return start_ind
        else 
            start_ind += 1
        end
    end
end

function find_no_atoms(lines::Array{String,1})
    for line in lines
        if occursin("number of atoms/cell", line)
            return parse(Int64, split(line)[5])
        end
    end
end

function read_coords(lines::Array{String,1})
    n_atoms = find_no_atoms(lines)
    xf = zeros(3, n_atoms)
    species = [:blah for i = 1:n_atoms]
    start_coords = find_atomic_positions(lines)
    for a = 1:n_atoms
        line = split(lines[start_coords+a])
        species[a] = Symbol(line[1])
        for c = 1:3
            xf[c, a] = parse(Float64, line[1+c])
        end
    end
    return species, xf
end

function read_lattice_parameter(lines::Array{String,1}, start_ind::Int)
    for line in lines[start_ind:end]
        if occursin("lattice parameter", line)
            # Bohr to Angstrom
            return parse(Float64, split(line)[5]) * 0.529177
        end
    end
end

function read_box(lines::Array{String,1}, start_ind::Int)
    # find start of box info
    box_start = start_ind - 1
    for line in lines[start_ind:end]
        box_start += 1
        if occursin("crystal axes", line)
            break
        end
    end 
    f_to_c = zeros(3, 3)
    for i = 1:3
        for j = 1:3
            f_to_c[i, j] = parse(Float64, split(lines[box_start+j])[3+i])
        end
    end
    
    # scale by lattice params
    return f_to_c * read_lattice_parameter(lines, start_ind)
end

function read_unitcell_volume(lines::Array{String,1}, start_ind::Int)
    for line in lines[start_ind:end]
        if occursin("unit-cell volume", line)
            if occursin("new", line)
                return parse(Float64, split(line)[5]) * 0.529177 ^ 3 # Bohr to Angstro
            else
                return parse(Float64, split(line)[4]) * 0.529177 ^ 3 # Bohr to Angstrom
            end
        end
    end
end

read_unitcell_volume (generic function with 1 method)

In [4]:
function qe_output_to_cif(qe_output_filename::String)
    # read in lines of QE output file
    qe_file = open(qe_output_filename)
    lines = readlines(qe_file)
    close(qe_file)
    
    # check to see if the simulation converged
    if !check_if_converged(lines)
        @warn qe_output_filename * ", BFGS did not fully converge!"
    end
    
    # where to find data changes for different 
    # types of calculations
    if is_vcrelax(qe_output_filename)
        start_line = find_calculation_end(lines)
    else
        start_line = 1
    end
    
    # read species and coords
    species, xf = read_coords(lines)
    atoms = Atoms(species, Frac(xf))

    # read unit cell info
    f_to_c = read_box(lines, start_line)
    box = Box(f_to_c)
    
    # assert unit cell volume same as determinant of f to c matrix
    @assert isapprox(read_unitcell_volume(lines, start_line), det(f_to_c), atol=0.2)
    
    exchange_type = exchange_correlation_type(lines)
    
    crytal_name = String(split(qe_output_filename, ('/', '.'))[end - 1])  
    
    return Crystal(crytal_name, box, atoms, Charges{Frac}(0))
end

qe_output_to_cif (generic function with 1 method)

In [5]:
fragment_list = [
#     "Br",
    "C-3CH3",
#     "C-C",
    "CH2-CH2-CH3",
#     "CH2-CH3",
#     "CH2-NH2",
#     "CH3",
#     "CH-CH2",
#     "CH-O",
#     "CH-S",
#     "Cl",
#     "C-N",
#     "F",
    "N-2CH3",
#     "N-C-O",
#     "NH2",
#     "N-NH",
    "O-CH2-CH2-CH3",
    "O-CH2-CH3"
#     "O-CH3",
#     "O-C-N",
#     "OH",
#     "O-OH",
#     "P-2CH3",
#     "PH2",
#     "S-CH3",
#     "SH"
]

fragments_with_overlap = ["C-3CH3", "CH2-CH2-CH3", "CH2-CH3","CH2-NH2",
                          "CH-CH2", "CH-S", "N-2CH3", "O-CH2-CH2-CH3",
                          "O-CH3", "O-CH2-CH3", "P-2CH3", "PH2", 
                          "S-CH3", "SH"]

# The type of Arene Substitution ("ortho", "meta", "all") 
# TODO: "all" not yet implimented
# substitution_types = ["meta", "ortho"] #"meta"

14-element Array{String,1}:
 "C-3CH3"       
 "CH2-CH2-CH3"  
 "CH2-CH3"      
 "CH2-NH2"      
 "CH-CH2"       
 "CH-S"         
 "N-2CH3"       
 "O-CH2-CH2-CH3"
 "O-CH3"        
 "O-CH2-CH3"    
 "P-2CH3"       
 "PH2"          
 "S-CH3"        
 "SH"           

In [6]:
substitution_type = ["meta"] # options: "meta", ortho"

parent_mof = "NiPyC2_relax_sc211"

exchange_correlation = "pbesol"
calc_type = "relax"
#####
# flag to determine whether xtals are going though a second stage of relaxation
#####
finished_second_round = false


for fg in fragment_list
    # if the fragment has overlapping atoms with the parent mof,
    # the filenames were flagged with a comment "_frozen_parent_atoms"
    if fg in fragments_with_overlap
        comment = "_frozen_parent_atoms"
        if finished_second_round
            comment = comment * "_Round2"
        end
    else
        comment = ""
    end
    # loop over the substitution types
    for sub_type in substitution_type
        qe_output_filename = joinpath(pwd(), "QE_relaxation_results", 
            exchange_correlation * "_" * calc_type,
            parent_mof * "_" * sub_type * "_functionalized_" * fg *
            "_" * exchange_correlation * "_" * calc_type * comment * ".out")
        
        print(split(qe_output_filename, ('/', '.'))[end - 1], "\n")

        xtal = qe_output_to_cif(qe_output_filename)

        strip_numbers_from_atom_labels!(xtal)

        write_cif(xtal, joinpath(pwd(), "post-relaxation_cifs",
                xtal.name))
        
        write_xyz(Cart(xtal.atoms, xtal.box), joinpath(pwd(), "post-relaxation_cifs",
                xtal.name)) 
    end
end

NiPyC2_relax_sc211_meta_functionalized_C-3CH3_pbesol_relax_frozen_parent_atoms
NiPyC2_relax_sc211_meta_functionalized_CH2-CH2-CH3_pbesol_relax_frozen_parent_atoms


└ @ Main In[4]:9


NiPyC2_relax_sc211_meta_functionalized_N-2CH3_pbesol_relax_frozen_parent_atoms
NiPyC2_relax_sc211_meta_functionalized_O-CH2-CH2-CH3_pbesol_relax_frozen_parent_atoms
NiPyC2_relax_sc211_meta_functionalized_O-CH2-CH3_pbesol_relax_frozen_parent_atoms


In [7]:
# # check a file to varify output
# @eval PorousMaterials PATH_TO_CRYSTALS = joinpath(pwd(), "post-relaxation_cifs")

# crys = Crystal("NiPyC2_relax_sc211_meta_functionalized_NH2_pbesol_relax.cif")
# strip_numbers_from_atom_labels!(crys)
# crys