# Tutorial 2.0.2: Filesystem and Paths

This notebook sets up the directory structure that will be used to store all the files in Tutorial 2.

First, import everything by executing the notebook with all the imports:

In [None]:
%run Tutorial_2_Imports.ipynb

All the files generated during this tutorial will go into the following directory:

In [None]:
paths = Directory("Tutorial_2_workdir").create()

The path for log files from electronic structure calculations.

In [None]:
paths.logs = paths.sub_directory("logs").create()

In [None]:
# paths.jobs = paths.sub_directory("jobs").create()

In [None]:
# paths.vibrational_modes = paths.sub_directory("vibrational_modes").create()
# paths.vibrational_modes.CH3NH2 = paths.vibrational_modes.file("CH3NH2.modes")

MB-Fit uses `SystemDefinitions` and `.def` files to store information about molecular systems. Here is where we will store those `.def` files:

In [None]:
paths.definitions = paths.sub_directory("definitions").create()

paths.definitions.fragments = paths.definitions.sub_directory("fragments").create()
paths.definitions.fragments.CH3NH2 = paths.definitions.fragments.file("CH3NH2")
paths.definitions.fragments.H2O = paths.definitions.fragments.file("H2O")

paths.definitions.systems = paths.definitions.sub_directory("systems").create()
paths.definitions.systems.CH3NH2 = paths.definitions.systems.file("CH3NH2")
paths.definitions.systems.H2O = paths.definitions.systems.file("H2O")
paths.definitions.systems.CH3NH2_H2O = paths.definitions.systems.file("CH3NH2_H2O")
paths.definitions.systems.CH3NH2_H2O_H2O = paths.definitions.systems.file("CH3NH2_H2O_H2O")
paths.definitions.systems.tetramer = paths.definitions.systems.file("tetramer")
paths.definitions.systems.pentamer = paths.definitions.systems.file("pentamer")

We will use these fils to store structure files:

In [None]:
paths.structures = paths.sub_directory("structures").create()

paths.structures.initialized_CH3NH2 = paths.structures.file("initialized_CH3NH2.xyz")
paths.structures.initialized_H2O = paths.structures.file("initialized_H2O.xyz")

paths.structures.minimized_CH3NH2 = paths.structures.file("minimized_CH3NH2.xyz")
paths.structures.minimized_H2O = paths.structures.file("minimized_H2O.xyz")

paths.structures.vibrational_modes_CH3NH2 = paths.structures.file("vibrational_modes_CH3NH2.xyz")
paths.structures.vibrational_modes_H2O = paths.structures.file("vibrational_modes_H2O.xyz")

We will generate a number of 2- and 3-body scans that we will use to validate our PEF, which we will store at the following locations:

In [None]:
paths.scans = paths.sub_directory("scans").create()

paths.scans.CH3NH2_H2O = paths.scans.sub_directory("CH3NH2_H2O").create()
paths.scans.CH3NH2_H2O.first = paths.scans.CH3NH2_H2O.file("first.xyz")
paths.scans.CH3NH2_H2O.second = paths.scans.CH3NH2_H2O.file("second.xyz")
paths.scans.CH3NH2_H2O.third = paths.scans.CH3NH2_H2O.file("third.xyz")
paths.scans.CH3NH2_H2O.first_energies = paths.scans.CH3NH2_H2O.file("first_energies.xyz")
paths.scans.CH3NH2_H2O.second_energies = paths.scans.CH3NH2_H2O.file("second_energies.xyz")
paths.scans.CH3NH2_H2O.third_energies = paths.scans.CH3NH2_H2O.file("third_energies.xyz")

paths.scans.CH3NH2_H2O_H2O = paths.scans.sub_directory("CH3NH2_H2O_H2O").create()
paths.scans.CH3NH2_H2O_H2O.first = paths.scans.CH3NH2_H2O_H2O.file("first.xyz")
paths.scans.CH3NH2_H2O_H2O.second = paths.scans.CH3NH2_H2O_H2O.file("second.xyz")
paths.scans.CH3NH2_H2O_H2O.third = paths.scans.CH3NH2_H2O_H2O.file("third.xyz")
paths.scans.CH3NH2_H2O_H2O.first_energies = paths.scans.CH3NH2_H2O_H2O.file("first_energies.xyz")
paths.scans.CH3NH2_H2O_H2O.second_energies = paths.scans.CH3NH2_H2O_H2O.file("second_energies.xyz")
paths.scans.CH3NH2_H2O_H2O.third_energies = paths.scans.CH3NH2_H2O_H2O.file("third_energies.xyz")

We will also use some small gas-phase clusters to validate our PEF, which will be stored here:

In [None]:
paths.clusters = paths.sub_directory("clusters").create()

paths.clusters.CH3NH2_H2O = paths.clusters.file("CH3NH2_H2O.xyz")
paths.clusters.CH3NH2_H2O_H2O = paths.clusters.file("CH3NH2_H2O_H2O.xyz")
paths.clusters.tetramer = paths.clusters.file("tetramer.xyz")
paths.clusters.pentamer = paths.clusters.file("pentamer.xyz")

paths.clusters.CH3NH2_H2O_energies = paths.clusters.file("CH3NH2_H2O_energies.xyz")
paths.clusters.CH3NH2_H2O_H2O_energies = paths.clusters.file("CH3NH2_H2O_H2O_energies.xyz")
paths.clusters.tetramer_energies = paths.clusters.file("tetramer_energies.xyz")
paths.clusters.pentamer_energies = paths.clusters.file("pentamer_energies.xyz")

paths.clusters.CH3NH2_H2O_modes = paths.clusters.file("CH3NH2_H2O_modes.xyz")
paths.clusters.CH3NH2_H2O_H2O_modes = paths.clusters.file("CH3NH2_H2O_H2O_modes.xyz")

We will store all the parameters for Vphys in the following directory

In [None]:
paths.Vphys = paths.sub_directory("Vphys").create()

paths.Vphys.polarizabilities = paths.Vphys.file("polarizabilities.json")
paths.Vphys.polarizability_factors = paths.Vphys.file("polarizability_factors.json")
paths.Vphys.radii = paths.Vphys.file("radii.json")
paths.Vphys.charges = paths.Vphys.file("charges.json")
paths.Vphys.C6 = paths.Vphys.file("C6.json")
paths.Vphys.d6 = paths.Vphys.file("d6.json")

paths.Vphys.charge_configs_CH3NH2 = paths.Vphys.file("charge_configs_CH3NH2.xyz")

We will store everything to do with the 1-body CH$_3$-NH$_2$ PEF here:

In [None]:
paths.PIP_CH3NH2 = paths.sub_directory("PIP_CH3NH2").create()

paths.PIP_CH3NH2.configs = paths.PIP_CH3NH2.sub_directory("configs").create()
paths.PIP_CH3NH2.configs.vibrational_modes = paths.PIP_CH3NH2.configs.file("vibrational_modes.xyz")
paths.PIP_CH3NH2.configs.pbmetad_pool = paths.PIP_CH3NH2.configs.file("pbmetad_pool.xyz")
paths.PIP_CH3NH2.configs.pbmetad_train_pool = paths.PIP_CH3NH2.configs.file("pbmetad_train_pool.xyz")
paths.PIP_CH3NH2.configs.pbmetad_test = paths.PIP_CH3NH2.configs.file("pbmetad_test.xyz")
paths.PIP_CH3NH2.configs.pbmetad_train = paths.PIP_CH3NH2.configs.file("pbmetad_train.xyz")

paths.PIP_CH3NH2.configs.vibrational_modes_energies = paths.PIP_CH3NH2.configs.file("vibrational_modes_energies.xyz")
paths.PIP_CH3NH2.configs.pbmetad_train_energies = paths.PIP_CH3NH2.configs.file("pbmetad_train_energies.xyz")

paths.PIP_CH3NH2.training_set = paths.PIP_CH3NH2.file("training_set.xyz")

paths.PIP_CH3NH2.PIP = paths.PIP_CH3NH2.sub_directory("PIP").create()
paths.PIP_CH3NH2.PIP.info = paths.PIP_CH3NH2.PIP.file("info.in")
paths.PIP_CH3NH2.PIP.polynomial = paths.PIP_CH3NH2.PIP.sub_directory("polynomial").create()
paths.PIP_CH3NH2.PIP.parameterizer = paths.PIP_CH3NH2.PIP.sub_directory("parameterizer").create()
paths.PIP_CH3NH2.PIP.parametrization = paths.PIP_CH3NH2.PIP.sub_directory("parametrization").create()

We will store everything to do with the 2-body CH$_3$-NH$_2$ -- H2O PEF here:

In [None]:
paths.PIP_CH3NH2_H2O = paths.sub_directory("PIP_CH3NH2_H2O").create()

paths.PIP_CH3NH2_H2O.configs = paths.PIP_CH3NH2_H2O.sub_directory("configs").create()
paths.PIP_CH3NH2_H2O.configs.vibrational_modes = paths.PIP_CH3NH2_H2O.configs.file("vibrational_modes.xyz")
paths.PIP_CH3NH2_H2O.configs.random = paths.PIP_CH3NH2_H2O.configs.file("random.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_pool = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_pool.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_pool_filtered = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_pool_filtered.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_train_pool = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_train_pool.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_test = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_test.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_train = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_train.xyz")

paths.PIP_CH3NH2_H2O.configs.vibrational_modes_energies = paths.PIP_CH3NH2_H2O.configs.file("vibrational_modes_energies.xyz")
paths.PIP_CH3NH2_H2O.configs.random_energies = paths.PIP_CH3NH2_H2O.configs.file("random_energies.xyz")
paths.PIP_CH3NH2_H2O.configs.pbmetad_train_energies = paths.PIP_CH3NH2_H2O.configs.file("pbmetad_train_energies.xyz")

paths.PIP_CH3NH2_H2O.training_set = paths.PIP_CH3NH2_H2O.file("training_set.xyz")

paths.PIP_CH3NH2_H2O.PIP = paths.PIP_CH3NH2_H2O.sub_directory("PIP").create()
paths.PIP_CH3NH2_H2O.PIP.info = paths.PIP_CH3NH2_H2O.PIP.file("info.in")
paths.PIP_CH3NH2_H2O.PIP.polynomial = paths.PIP_CH3NH2_H2O.PIP.sub_directory("polynomial").create()
paths.PIP_CH3NH2_H2O.PIP.parameterizer = paths.PIP_CH3NH2_H2O.PIP.sub_directory("parameterizer").create()
paths.PIP_CH3NH2_H2O.PIP.parametrization = paths.PIP_CH3NH2_H2O.PIP.sub_directory("parametrization").create()

We will store everything to do with the 2-body CH$_3$-NH$_2$ -- H$_2$O -- H$_2$O PEF here:

In [None]:
paths.PIP_CH3NH2_H2O_H2O = paths.sub_directory("PIP_CH3NH2_H2O_H2O").create()

paths.PIP_CH3NH2_H2O_H2O.vibrational_modes_CH3NH2_H2O_H2O = paths.PIP_CH3NH2_H2O_H2O.file("CH3NH2_H2O_H2O.modes")

paths.PIP_CH3NH2_H2O_H2O.configs = paths.PIP_CH3NH2_H2O_H2O.sub_directory("configs").create()
paths.PIP_CH3NH2_H2O_H2O.configs.vibrational_modes = paths.PIP_CH3NH2_H2O_H2O.configs.file("vibrational_modes.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.random = paths.PIP_CH3NH2_H2O_H2O.configs.file("random.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_pool = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_pool.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_pool_filtered = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_pool_filtered.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_train_pool = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_train_pool.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_test = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_test.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_train = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_train.xyz")

paths.PIP_CH3NH2_H2O_H2O.configs.vibrational_modes_energies = paths.PIP_CH3NH2_H2O_H2O.configs.file("vibrational_modes_energies.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.random_energies = paths.PIP_CH3NH2_H2O_H2O.configs.file("random_energies.xyz")
paths.PIP_CH3NH2_H2O_H2O.configs.pbmetad_train_energies = paths.PIP_CH3NH2_H2O_H2O.configs.file("pbmetad_train_energies.xyz")

paths.PIP_CH3NH2_H2O_H2O.training_set = paths.PIP_CH3NH2_H2O_H2O.file("training_set.xyz")

paths.PIP_CH3NH2_H2O_H2O.PIP = paths.PIP_CH3NH2_H2O_H2O.sub_directory("PIP").create()
paths.PIP_CH3NH2_H2O_H2O.PIP.info = paths.PIP_CH3NH2_H2O_H2O.PIP.file("info.in")
paths.PIP_CH3NH2_H2O_H2O.PIP.polynomial = paths.PIP_CH3NH2_H2O_H2O.PIP.sub_directory("polynomial").create()
paths.PIP_CH3NH2_H2O_H2O.PIP.parameterizer = paths.PIP_CH3NH2_H2O_H2O.PIP.sub_directory("parameterizer").create()
paths.PIP_CH3NH2_H2O_H2O.PIP.parametrization = paths.PIP_CH3NH2_H2O_H2O.PIP.sub_directory("parametrization").create()

Here we will store our copy of MBX, in-which we will implement the new PEF:

In [None]:
paths.MBX = paths.sub_directory("MBX")

At many steps, we will do expensive work that we won't want to repeat if we rerun the associated function. MB-Fit uses restart files for a variety of steps:

In [None]:
paths.restart = paths.sub_directory("restart").create()

paths.restart.minimization_CH3NH2 = paths.restart.sub_directory("minimization_CH3NH2").create()
paths.restart.minimization_H2O = paths.restart.sub_directory("minimization_H2O").create()

paths.restart.scans_CH3NH2_H2O_1_energies = paths.restart.file("scans_CH3NH2_H2O_1_energies")
paths.restart.scans_CH3NH2_H2O_2_energies = paths.restart.file("scans_CH3NH2_H2O_2_energies")
paths.restart.scans_CH3NH2_H2O_3_energies = paths.restart.file("scans_CH3NH2_H2O_3_energies")

paths.restart.scans_CH3NH2_H2O_H2O_1_energies = paths.restart.file("scans_CH3NH2_H2O_H2O_1_energies")
paths.restart.scans_CH3NH2_H2O_H2O_2_energies = paths.restart.file("scans_CH3NH2_H2O_H2O_2_energies")
paths.restart.scans_CH3NH2_H2O_H2O_3_energies = paths.restart.file("scans_CH3NH2_H2O_H2O_3_energies")

paths.restart.clusters_CH3NH2_H2O = paths.restart.sub_directory("clusters_CH3NH2_H2O").create()
paths.restart.clusters_CH3NH2_H2O_H2O = paths.restart.sub_directory("clusters_CH3NH2_H2O_H2O").create()
paths.restart.clusters_tetramer = paths.restart.sub_directory("clusters_tetramer").create()
paths.restart.clusters_pentamer = paths.restart.sub_directory("clusters_pentamer").create()

paths.restart.clusters_CH3NH2_H2O_energies = paths.restart.file("clusters_CH3NH2_H2O_energies")
paths.restart.clusters_CH3NH2_H2O_H2O_energies = paths.restart.file("clusters_CH3NH2_H2O_H2O_energies")
paths.restart.clusters_tetramer_energies = paths.restart.file("clusters_tetramer_energies")
paths.restart.clusters_pentamer_energies = paths.restart.file("clusters_pentamer_energies")

paths.restart.clusters_CH3NH2_H2O_modes = paths.restart.sub_directory("clusters_CH3NH2_H2O_modes").create()
paths.restart.clusters_CH3NH2_H2O_H2O_modes = paths.restart.sub_directory("clusters_CH3NH2_H2O_H2O_modes").create()
paths.restart.clusters_tetramer_energies = paths.restart.file("clusters_tetramer_energies")
paths.restart.clusters_pentamer_energies = paths.restart.file("clusters_pentamer_energies")

paths.restart.polarizabilities_CH3NH2 = paths.restart.file("polarizabilities_CH3NH2")
paths.restart.radii_CH3NH2 = paths.restart.file("radii_CH3NH2")
paths.restart.radii_H2O = paths.restart.file("radii_H2O")
paths.restart.dispersion_CH3NH2_H2O = paths.restart.file("dispersion_CH3NH2_H2O")
paths.restart.dispersion_CH3NH2_CH3NH2 = paths.restart.file("dispersion_CH3NH2_CH3NH2")
paths.restart.charge_fitting_CH3NH2 = paths.restart.file("charge_fitting_CH3NH2")

paths.restart.vibrational_modes_CH3NH2 = paths.restart.file("vibrational_modes_CH3NH2")
paths.restart.vibrational_modes_H2O = paths.restart.file("vibrational_modes_H2O")
paths.restart.vibrational_modes_CH3NH2_H2O = paths.restart.file("vibrational_modes_CH3NH2_H2O")
paths.restart.vibrational_modes_CH3NH2_H2O_H2O = paths.restart.file("vibrational_modes_CH3NH2_H2O_H2O")

paths.restart.esp_CH3NH2 = paths.restart.sub_directory("esp_CH3NH2").create()

paths.restart.pbmetad_CH3NH2 = paths.restart.file("pbmetad_CH3NH2")
paths.restart.fps_CH3NH2 = paths.restart.sub_directory("fps_CH3NH2").create()
paths.restart.energies_vibrational_mode_configs_CH3NH2 = paths.restart.file("energies_vibrational_mode_configs_CH3NH2")
paths.restart.energies_pbmetad_configs_CH3NH2 = paths.restart.file("energies_pbmetad_configs_CH3NH2")

paths.restart.pbmetad_CH3NH2_H2O = paths.restart.file("pbmetad_CH3NH2_H2O")
paths.restart.fps_CH3NH2_H2O = paths.restart.sub_directory("fps_CH3NH2_H2O").create()
paths.restart.energies_vibrational_mode_configs_CH3NH2_H2O = paths.restart.file("energies_vibrational_mode_configs_CH3NH2_H2O")
paths.restart.energies_random_configs_CH3NH2_H2O = paths.restart.file("energies_random_configs_CH3NH2_H2O")
paths.restart.energies_pbmetad_configs_CH3NH2_H2O = paths.restart.file("energies_pbmetad_configs_CH3NH2_H2O")

paths.restart.pbmetad_CH3NH2_H2O_H2O = paths.restart.file("pbmetad_CH3NH2_H2O_H2O")
paths.restart.fps_CH3NH2_H2O_H2O = paths.restart.sub_directory("fps_CH3NH2_H2O_H2O").create()
paths.restart.energies_vibrational_mode_configs_CH3NH2_H2O_H2O = paths.restart.file("energies_vibrational_mode_configs_CH3NH2_H2O_H2O")
paths.restart.energies_random_configs_CH3NH2_H2O_H2O = paths.restart.file("energies_random_configs_CH3NH2_H2O_H2O")
paths.restart.energies_pbmetad_configs_CH3NH2_H2O_H2O = paths.restart.file("energies_pbmetad_configs_CH3NH2_H2O_H2O")


We will also need some scratch directories for various steps in the PEF construction process:

In [None]:
import os

paths.scratch = paths.sub_directory("scratch").create()

paths.scratch.psi4 = paths.scratch.sub_directory("psi4").create()

paths.scratch.esp_CH3NH2 = paths.scratch.sub_directory("esp_CH3NH2").create()

paths.scratch.pbmetad_CH3NH2 = paths.scratch.sub_directory("pbmetad_CH3NH2").create()
paths.scratch.fps_CH3NH2 = paths.scratch.sub_directory("fps_CH3NH2").create()
paths.scratch.PIP_CH3NH2 = paths.scratch.sub_directory("PIP_CH3NH2").create()
# paths.scratch.fitting_CH3NH2 = paths.scratch.sub_directory("fitting_CH3NH2").create()
paths.scratch.fitting_CH3NH2 = Directory(f"/scratch/{os.environ['USER']}/job_{os.environ['SLURM_JOB_ID']}/CH3NH2_fitting")

paths.scratch.pbmetad_CH3NH2_H2O = paths.scratch.sub_directory("pbmetad_CH3NH2_H2O").create()
paths.scratch.fps_CH3NH2_H2O = paths.scratch.sub_directory("fps_CH3NH2_H2O").create()
paths.scratch.PIP_CH3NH2_H2O = paths.scratch.sub_directory("PIP_CH3NH2_H2O").create()
# paths.scratch.fitting_CH3NH2_H2O = paths.scratch.sub_directory("fitting_CH3NH2_H2O").create()
paths.scratch.fitting_CH3NH2_H2O = Directory(f"/scratch/{os.environ['USER']}/job_{os.environ['SLURM_JOB_ID']}/CH3NH2_fitting")

paths.scratch.pbmetad_CH3NH2_H2O_H2O = paths.scratch.sub_directory("pbmetad_CH3NH2_H2O_H2O").create()
paths.scratch.fps_CH3NH2_H2O_H2O = paths.scratch.sub_directory("fps_CH3NH2_H2O_H2O").create()
paths.scratch.PIP_CH3NH2_H2O_H2O = paths.scratch.sub_directory("PIP_CH3NH2_H2O_H2O").create()
# paths.scratch.fitting_CH3NH2_H2O_H2O = paths.scratch.sub_directory("fitting_CH3NH2_H2O_H2O").create()
paths.scratch.fitting_CH3NH2_H2O_H2O = Directory(f"/scratch/{os.environ['USER']}/job_{os.environ['SLURM_JOB_ID']}/CH3NH2_fitting")


This is the path to the LAMMPS executable compiled with plumed that we will use to run parallel bias metadynamics simulations during training set generation:

In [None]:
lammps_executable_path = "/expanse/projects/qstore/csd973/LAMMPS+plumed/LAMMPS-stable/bin/lmp_mpi"