In [1]:
using PorousMaterials
using JLD2
using PyPlot
using PyCall
using CSV
using DataFrames

# config plot settings
PyPlot.matplotlib.style.use("ggplot")
rcParams = PyPlot.PyDict(PyPlot.matplotlib."rcParams")
rcParams["font.size"] = 16;

In [2]:
###
#  read in lines to get COF filenames
###
filename = joinpath(pwd(), "./descriptors/cof_names.txt")
cof_name_file = open(filename)
cof_names = readlines(cof_name_file)
close(cof_name_file)

###
#  load properties
###
chem_props_filename = joinpath(pwd(), "descriptors/chemical_properties.csv")
geo_props_filename  = joinpath(pwd(), "descriptors/geometric_properties.csv")

chemical_properties  = CSV.read(chem_props_filename, DataFrame)
geometric_properties = CSV.read(geo_props_filename, DataFrame)

descriptors = outerjoin(geometric_properties, chemical_properties, on=["crystal_name"]);
names(descriptors)

15-element Vector{String}:
 "crystal_name"
 "pore_diameter_Å"
 "void_fraction"
 "surface_area_m²g⁻¹"
 "crystal_density"
 "density_of_Carbon"
 "density_of_Hydrogen"
 "density_of_Oxygen"
 "density_of_Nitrogen"
 "density_of_Silicon"
 "density_of_Sulfur"
 "density_of_Boron"
 "density_of_Phosphorus"
 "density_of_Halogens"
 "density_of_Metals"

In [3]:
# for some reason these ones aren't working... LOOk INTO IT
skip_for_now = ["07012N3_ddec.cif"]
delete!(descriptors, findfirst(descriptors[:, "crystal_name"] .== skip_for_now[1]));

In [4]:
###
#  simulation parameters
###
adsorbates = Molecule.(["Kr", "Xe"])
mole_fxn   = [0.8, 0.2] # [Kr, Xe]
total_pressure = 1.0    # bar
partial_pressures = total_pressure * mole_fxn

temperature = 298.0 # K
ljff = LJForceField("UFF")
nb_ins_per_vol = 500
nb_cyc_per_vol = 150

benchmarked_cofs = ["05000N2_ddec.cif", "17156N2_ddec.cif"]

2-element Vector{String}:
 "05000N2_ddec.cif"
 "17156N2_ddec.cif"

## Calculate Error in Selectivity from Henry Coefficient:
We have the Henry coefficients $ H_{Xe} = h_{Xe} \pm \delta h_{Xe} $, $ H_{Kr} = h_{Kr} \pm \delta h_{Kr}$,  where $h_j$ is the "measured" value and $\delta h_j$ is the associated uncertainty. The Selectivity is calculated as $ S_{Xe/Kr} = s_i \pm \delta s_i$ where $s_i = \frac{h_{Xe}}{h_{Kr}}$. 

To determine the uncertainty of the selectivity $\delta s_i$, we use the fact that the relative errors add in quaderature:
$$ \left(\frac{\delta s_i}{s_i}\right)^2 = \left( \frac{\delta h_{Xe}}{h_{Xe}} \right)^2 + \left( \frac{\delta h_{Kr}}{h_{Kr}} \right)^2  $$

So, 
$$ \delta s_i = s_i\sqrt{ {\left( \frac{\delta h_{Xe}}{h_{Xe}} \right)}^2 + {\left(\frac{\delta h_{Kr}}{h_{Kr}}\right)}^2 } $$

Substituting in the expression for $s_i$, we get:
$$ S_{Xe/Kr} = \frac{h_{Xe}}{h_{Kr}} \pm \frac{h_{Xe}}{h_{Kr}}\sqrt{ {\left( \frac{\delta h_{Xe}}{h_{Xe}} \right)}^2 + {\left(\frac{ \delta h_{Kr}}{h_{Kr}}\right)}^2 }$$


Note: For GCMC, replace the henry coefficient (H) with the amount adsorbed by each species (N_i) and multiply by the mole fraction. 

In [5]:
function get_henry_data(cof_names::Array{String, 1}, 
                        adsorbates::Array{Molecule{Cart}, 1};
                        calculate_selectivity::Bool=true,
                        selectivity_species::Union{Nothing, Array{Symbol, 1}}=nothing)
    # initialize data structure to store results
    henry_df = DataFrame()
    
    # loop over every material
    for cof_name in cof_names
        xtal = Crystal(cof_name; check_neutrality=false)
        if xtal.name in skip_for_now
            continue
        end
        # get results dictionary for each material, adsorbate pair
        for molecule in adsorbates
            # get filename 
            henry_filename = henry_result_savename(xtal, molecule, temperature, ljff, nb_ins_per_vol)
            # load results dictionary from file
            @load joinpath(PorousMaterials.rc[:paths][:simulations], henry_filename) results
            # include adsorbate (probe) species in results dict
            res = deepcopy(results) # don't want to modify original dict
            res["adsorbate"] = String(molecule.species)
            # add to dataframe
            henry_df = vcat(henry_df, DataFrame(res))
        end
    end
    return henry_df
end

get_henry_data (generic function with 1 method)

In [6]:
function calculate_num_burn_and_sample_cycles(xtal::Crystal, cycles_per_volume::Int64; 
                                              fraction_burn_cycles::Float64=0.5)
    nb_cycles = max(5, ceil(Int, cycles_per_volume * xtal.box.Ω))
    @assert (0.0 < fraction_burn_cycles) && (fraction_burn_cycles < 1.0) 
    n_burn_cycles   = ceil(Int, nb_cycles * fraction_burn_cycles)
    n_sample_cycles = ceil(Int, nb_cycles * (1 - fraction_burn_cycles))
    return n_burn_cycles, n_sample_cycles
end

calculate_num_burn_and_sample_cycles (generic function with 1 method)

In [7]:
function get_gcmc_data(cof_names::Array{String, 1}, 
                        adsorbates::Array{Molecule{Cart}, 1};
                        calculate_selectivity::Bool=true)
    # initialize data structure to store results
    gcmc_df = DataFrame()
    
    # loop over every material
    for cof_name in cof_names
        xtal = Crystal(cof_name; check_neutrality=false)
        if xtal.name in skip_for_now
            continue
        elseif xtal.name in ["16290N3_ddec.cif", "21111N3_ddec.cif"]
            continue
        end
        n_burn, n_sample = calculate_num_burn_and_sample_cycles(xtal, nb_cyc_per_vol)
        gcmc_filename = μVT_output_filename(xtal, adsorbates, temperature, 
                                            partial_pressures, ljff, n_burn, n_sample)
        @load joinpath(PorousMaterials.rc[:paths][:simulations], gcmc_filename) results
        
        gcmc_df = vcat(gcmc_df, DataFrame(results))
    end
    return gcmc_df
end

get_gcmc_data (generic function with 1 method)

In [8]:
henry_data = get_henry_data(cof_names, adsorbates)
# CSV.write(joinpath(pwd(), "henry_calculations.csv"), henry_df)

Unnamed: 0_level_0,Qst (kJ/mol),adsorbate,elapsed time (min),err Qst (kJ/mol),err henry coefficient [mmol/(g-bar)]
Unnamed: 0_level_1,Float64,String,Float64,Float64,Float64
1,21.329,Kr,1.80109,0.131795,0.0214178
2,24.8751,Xe,1.65142,0.213472,0.0579636
3,10.6927,Kr,5.61557,0.0150828,0.00165314
4,14.8449,Xe,5.29136,0.00924769,0.00422026
5,9.9699,Kr,4.40649,0.0122787,0.00161386
6,14.1487,Xe,4.24425,0.00979169,0.00303046
7,17.0255,Kr,1.84388,0.0188006,0.012399
8,23.4406,Xe,1.73891,0.0341487,0.112161
9,11.5299,Kr,3.25362,0.0191455,0.00248071
10,15.5974,Xe,3.27022,0.0109519,0.00542812


In [9]:
gcmc_data = get_gcmc_data(cof_names, adsorbates)

Unnamed: 0_level_0,# burn cycles,# sample cycles,# samples,Fraction of deletion proposals accepted
Unnamed: 0_level_1,Int64,Int64,Int64,Float64
1,92370,92370,1229672,0.00980134
2,92370,92370,1229672,0.00980134
3,395494,395494,7222672,0.238335
4,395494,395494,7222672,0.238335
5,629651,629651,3617599,0.244453
6,629651,629651,3617599,0.244453
7,96389,96389,8095343,0.0575339
8,96389,96389,8095343,0.0575339
9,231699,231699,3438017,0.216157
10,231699,231699,3438017,0.216157


## Plotting Functions

In [10]:
###
#  Plot:
#  1. Pore size dist
#  2. Cost H vs. Cost GCMC 
#  4. GCMC S vs H S
#  3. descriptor vs Selectivity
###

In [11]:
# mksz = [8 for i in 1:length(cof_names)];

In [12]:
# figure()

# for (i, name) in enumerate(descriptors[:, "crystal_name"])
#     color = "C1"
#     if name in skip_for_now
#         continue
#     elseif name in benchmarked_cofs
#         color="C0"
#     end
#     time = henry_data[(name, "Xe")][1, "elapsed time (min)"] + 
#            henry_data[(name, "Kr")][1, "elapsed time (min)"]
    
#     scatter(descriptors[i, "pore_diameter_Å"], time, s=8, c=color)
# end

# xlabel("pore_diameter_Å")
# ylabel("total sim time (min)")

# tight_layout()

In [13]:
# for name in names(descriptors)
#     if name == "crystal_name"
#         continue
#     end
    
#     figure()
#     scatter(descriptors[:, name], henry_df[:, "selectivity"];
#             s=mksz)
    
#     xlabel(name)
#     ylabel("Xe/Kr selectivity")
#     tight_layout()
# end