In [2]:
using DataFrames
using CSV
using Printf
using Statistics
using LinearAlgebra
using PorousMaterials
using LightGraphs
using GraphPlot

if length(ARGS) != 1
    error("Pass xyz file as argument")
end
xyz_filename = ARGS[1]
atoms = read_xyz(joinpath(homedir(), "Documents", "Grad School", "Research", xyz_filename))

box = Box(15.0,15.0,15.0)
atoms=Frac(atoms, box)

crystal = Crystal("NewTest.cif", box, atoms, Charges{Frac}(0))
infer_bonds!(crystal, false)
Graph(crystal.bonds)

for ed in edges(crystal.bonds)
   
    println(ed)
    i = ed.src
    j = ed.dst
       
    species_i = crystal.atoms.species[i]
    species_j = crystal.atoms.species[j]
    println("atom ", species_i, " is connected to ", species_j)
    

end

##################################################################################################
### Function to identify carboxyl. When species is carbon, checks if it has two oxygen neighbors. 
### If it does, returns oxygen id's, anchor id, and oxygen's hydrogen id

function identify_carboxyl(crystal::Crystal, a::Int64)
   
    species = crystal.atoms.species[a]    #Identify species of selected atom
    nbs = neighbors(crystal.bonds, a)     #Identify neighbors of atom
    
    if species == :C                      #Identifying carboxylate starts with carbon
        oxygen_counter = 0
        oxygen_id = Int64[]
        X_id = 0
        H_id = 0
        for nb in nbs                     #Iterating through neighbors via identifying number
            species_nb = crystal.atoms.species[nb] #Get species of neighbor
            if species_nb == :O
                
                next_nbs = neighbors(crystal.bonds, nb)  #Get neighbors of oxygen (finding hydrogen)
                
                for next_nb in next_nbs
                    species_next_nb = crystal.atoms.species[next_nb]  #Get species of oxygen's neighbors
                    if species_next_nb == :H     #Identify hydrogen neighbor
                        H_id = next_nb
                    else
                    end
                end
                
                oxygen_counter += 1
                push!(oxygen_id, nb)         #Enter oxygen id into array
            else
                X_id = nb            #If neighbor of carboxylate carbon isn't oxygen, must be anchor
            end
        end
        
        if oxygen_counter == 2   #Assuming carboxylate carbon is only carbon with two oxygens bonded
        
            return true, oxygen_id, X_id, H_id
        else 
            return false, [], 0, 0
        end
    else
        return false, [], 0, 0
    end

end

##################################################################################################
keep = [true for i = 1:crystal.atoms.n]

for a = 1:crystal.atoms.n
       
   

    is_carboxyl, oxygen_ids, X_id, H_id = identify_carboxyl(crystal, a)
    
    if is_carboxyl == true 
    
        crystal.atoms.species[X_id] = :X
        @assert length(oxygen_ids) == 2
        keep[oxygen_ids] .= false
        keep[a] = false
        keep[H_id] = false
        
    end


end

##################################################################################################
for ed in edges(crystal.bonds)
   
    println(ed)
    i = ed.src
    j = ed.dst
       
    species_i = crystal.atoms.species[i]
    species_j = crystal.atoms.species[j]
    println("atom ", species_i, " is connected to ", species_j)
    

end

##################################################################################################
tobacco_crystal = crystal.atoms[keep]
write_cif(crystal, "tobacco_crystal.cif")
tobacco_crystal = Crystal("tobacco_crystal_2.cif", box, atoms[keep], Charges{Frac}(0))

##################################################################################################
function center_mass(crystal::Crystal)
    xf_cm = sum(crystal.atoms.coords.xf, dims = 2)
    frctn_xf_cm = xf_cm ./ crystal.atoms.n
    new_coords = crystal.atoms.coords.xf .- frctn_xf_cm
    crystal.atoms.coords.xf .= mod.(crystal.atoms.coords.xf, 1)
    crystal.atoms.coords.xf .= new_coords
    
    return crystal
end

##################################################################################################
tobacco_crystal = center_mass(tobacco_crystal)

##################################################################################################
function write_cif_Kai(crystal::Crystal, filename::AbstractString; fractional_coords::Bool=true,
		   number_atoms::Bool=true)
    if has_charges(crystal)
        if crystal.atoms.n != crystal.charges.n
            error("write_cif assumes equal numbers of Charges and Atoms (or zero charges)")
        end
        if ! isapprox(crystal.charges.coords, crystal.atoms.coords)
            error("write_cif needs coords of atoms and charges to correspond.")
        end
    end

    # TODO is this labeling necessary for the bonds, arthur?
    # create dictionary for tracking label numbers
    label_numbers = Dict{Symbol, Int}()
    for atom in crystal.atoms.species
        if !haskey(label_numbers, atom)
            label_numbers[atom] = 1
        end
    end

    # append ".cif" to filename if it doesn't already have the extension
    if ! occursin(".cif", filename)
        filename *= ".cif"
    end
    cif_file = open(filename, "w")
    # first line should be data_xtalname_PM
    if crystal.name == ""
        @printf(cif_file, "data_PM\n")
    else
        # don't include file extension!
        @printf(cif_file, "data_%s_PM\n", split(crystal.name, ".")[1])
    end

    @printf(cif_file, "_symmetry_space_group_name_H-M\t'%s'\n", crystal.symmetry.space_group)

    @printf(cif_file, "_cell_length_a\t%f\n", crystal.box.a)
    @printf(cif_file, "_cell_length_b\t%f\n", crystal.box.b)
    @printf(cif_file, "_cell_length_c\t%f\n", crystal.box.c)

    @printf(cif_file, "_cell_angle_alpha\t%f\n", crystal.box.α * 180.0 / pi)
    @printf(cif_file, "_cell_angle_beta\t%f\n", crystal.box.β * 180.0 / pi)
    @printf(cif_file, "_cell_angle_gamma\t%f\n", crystal.box.γ * 180.0 / pi)

    @printf(cif_file, "_symmetry_Int_Tables_number 1\n\n")
    @printf(cif_file, "loop_\n_symmetry_equiv_pos_as_xyz\n")
    for i in 1:size(crystal.symmetry.operations, 2)
        @printf(cif_file, "'%s,%s,%s'\n", crystal.symmetry.operations[:, i]...)
    end
    @printf(cif_file, "\n")

    @printf(cif_file, "loop_\n_atom_site_label\n_atom_site_type_symbol\n")
    if fractional_coords
        @printf(cif_file, "_atom_site_fract_x\n_atom_site_fract_y\n_atom_site_fract_z\n")
    else
        @printf(cif_file, "_atom_site_Cartn_x\n_atom_site_Cartn_y\n_atom_site_Cartn_z\n")
    end
    high_precision_charges = false # if, for neutrality, need high-precision charges
    if has_charges(crystal)
        @printf(cif_file, "_atom_site_charge\n")
        # if crystal will not be charge neutral to a 1e-5 tolerance when loading it
        #    into PorousMaterials.jl, then write higher-precision charges
        if abs(sum(round.(crystal.charges.q, digits=6))) > NET_CHARGE_TOL
            @info "writing high-precision charges for " * filename * ".\n"
            high_precision_charges = true
        end
    end

    idx_to_label = Array{AbstractString, 1}(undef, crystal.atoms.n)
    for i = 1:crystal.atoms.n
        # print label and type symbol
        
        if crystal.atoms.species[i] == :X
            @printf(cif_file, "%s\t%s\t", string(crystal.atoms.species[i]) *
                (number_atoms ? string(label_numbers[crystal.atoms.species[i]]) : ""),
                "C")
        else
            @printf(cif_file, "%s\t%s\t", string(crystal.atoms.species[i]) *
                (number_atoms ? string(label_numbers[crystal.atoms.species[i]]) : ""),
                crystal.atoms.species[i])
        end
        
        # store label for this atom idx
        idx_to_label[i] = string(crystal.atoms.species[i]) *
                    string(label_numbers[crystal.atoms.species[i]])
        # increment label
        label_numbers[crystal.atoms.species[i]] += 1
        if fractional_coords
            @printf(cif_file, "%f\t%f\t%f", crystal.atoms.coords.xf[:, i]...)
        else
            @printf(cif_file, "%f\t%f\t%f", (crystal.box.f_to_c * crystal.atoms.coords.xf[:, i])...)
        end
        if has_charges(crystal)
            if high_precision_charges
                @printf(cif_file, "\t%.10f\n", crystal.charges.q[i])
            else
                @printf(cif_file, "\t%f\n", crystal.charges.q[i])
            end
        else
            @printf(cif_file, "\n")
        end
    end

    # only print bond information if it is in the crystal
    if ne(crystal.bonds) > 0
        if ! number_atoms
             error("must label atoms with numbers to write bond information.\n")
        end
        # print column names for bond information
        @printf(cif_file, "\nloop_\n_geom_bond_atom_site_label_1\n_geom_bond_atom_site_label_2\n_geom_bond_distance\n_ccdc_geom_bond_type\n")

        for edge in collect(edges(crystal.bonds))
            dxf = crystal.atoms.coords.xf[:, edge.src] - crystal.atoms.coords.xf[:, edge.dst]
            nearest_image!(dxf)
            @printf(cif_file, "%s\t%s\t%0.5f\t%s\n", idx_to_label[edge.src], idx_to_label[edge.dst],
                    norm(dxf), ". A")
        end
    end
    close(cif_file)
end

##################################################################################################
write_cif_Kai(tobacco_crystal, "tobacco_crystal_2.cif")
infer_bonds!(tobacco_crystal, true)

##################################################################################################
write_cif_Kai(tobacco_crystal, "tobacco_crystal_3.cif")


LoadError: ArgumentError: invalid base 10 digit '{' in "{"