Skip to content

Commit

Permalink
first draft of MIS for grading, and some more additions to the code s…
Browse files Browse the repository at this point in the history
…keleton
  • Loading branch information
PeaWagon committed Nov 27, 2018
1 parent 10b1e98 commit 867af31
Show file tree
Hide file tree
Showing 14 changed files with 444 additions and 397 deletions.
Binary file modified docs/Design/MIS/MIS.pdf
Binary file not shown.
530 changes: 263 additions & 267 deletions docs/Design/MIS/MIS.tex

Large diffs are not rendered by default.

Binary file modified docs/SRS/SRS.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/SRS/SRS.tex
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ \subsection{Table of Units and Constants}



\subsection{Table of Symbols}
\subsection{Table of Symbols}\label{SRS-symbols}

The table that follows summarizes the symbols used in this document along with
their units.
Expand Down
24 changes: 16 additions & 8 deletions kaplan/energy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@


# TODO: make these functions callable from a function
# that checks the program being used (i.e. psi4 vs horton
# vs gaussian)


import psi4
# link to relevant documentation
# http://www.psicode.org/psi4manual/1.2/psiapi.html
Expand Down Expand Up @@ -65,25 +70,28 @@ def run_energy_calc(geom, method="scf",basis="aug-cc-pVTZ",
psi4.set_options({"reference": "uhf"})
energy = psi4.energy(method+'/'+basis)
return energy


def prep_psi4_geom(mol_obj):
def prep_psi4_geom(coords, charge, multip):
"""Make a psi4 compliant geometry string.
Parameters
----------
mol_obj : object
Should have a coords attribute,
a multiplicity (multip) attribute,
and a charge attribute.
coords : list(list)
Atomic cartesian coordinates and atom types.
Example for H_2:
[['H', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 1.0]]
charge : int
The charge of the molecule.
multip : int
The multiplicity of the molecule.
Returns
-------
A string as per psi4 input.
"""
psi4_str = f"{mol_obj.charge} {mol_obj.multip}\n"
for atom in mol_obj.coords:
psi4_str = f"{charge} {multip}\n"
for atom in coords:
psi4_str += f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n"
return psi4_str

Expand Down
83 changes: 61 additions & 22 deletions kaplan/fitg.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,62 @@


from kaplan.energy import
from kaplan.rmsd import

def get_fitness(pmem, fit_form, coef_energy, coef_rmsd):
pass

def calc_energy(parser):
# TODO: add parser attribute prog
# so we can do this:
# if parser.prog != 'psi4': blah blah
if parser.prog != 'psi4':
raise NotImplementedError("Only psi4 is supported at this time.")
input_geom = prep_psi4_geom(parser)
run_energy_calc(input_geom, parser.method, parser.basis)



def sum_energies(



from math import factorial
from kaplan.energy import run_energy_calc, prep_psi4_geom
from kaplan.rmsd import calc_rmsd

def get_fitness(xyz_coords, method, basis, fit_form, coef_energy, coef_rmsd, charge, multip):
return calc_fitness(fit_form, sum_energies(xyz_coords, charge, multip, method, basis), coef_energy, sum_rmsds(xyz_coords), coef_rmsd)

def sum_energies(xyz_coords, charge, multip, method, basis):
energies = np.zeros(len(xyz_files), float)
for i, xyz_file in enumerate(xyz_files):
energies[i] = run_energy_calc(prep_psi4_geom(xyz_file, charge, multip), method, basis)
return abs(sum(energies))

def sum_rmsds(xyz_coords):
rmsd_values = np.zeros(len(xyz_files), float)
# n choose k = n!/(k!(n-k)!)
num_pairs = factorial(num_geoms)/(2*factorial(num_geoms-2))
pairs = all_pairs_gen(len(xyz_files))
for i in range(num_pairs):
ind1, ind2 = next(pairs)
rmsd_values[i] = calc_rmsd(xyz_files[ind1], xyz_files[ind2])
return sum(rmsd_values)

def all_pairs_gen(num_geoms):
"""Yield indices of two geometries.
Note
----
This is a generator function.
"""
for i in range(num_geoms-1):
for j in range(i+1, num_geoms):
yield (i,j)


def calc_fitness(fit_form, sum_energy, coef_energy, sum_rmsd, coef_rmsd):
if fit_form == 0:
return sum_energy*coef_energy + sum_rmsd*coef_rmsd
else:
raise ValueError("Unsupported fitness formula.")

#def calc_energy(parser):
# # TODO: add parser attribute prog
# # so we can do this:
# # if parser.prog != 'psi4': blah blah
# if parser.prog != 'psi4':
# raise NotImplementedError("Only psi4 is supported at this time.")
# input_geom = prep_psi4_geom(parser.coords, parser.charge, parser.multip)
# run_energy_calc(input_geom, parser.method, parser.basis)

#def all_pairs(num_geoms):
# """Return indices of two geometries."""
# # n choose k = n!/(k!(n-k)!)
# num_pairs = factorial(num_geoms)/(2*factorial(num_geoms-2))
# pairs = []
# for i in range(num_geoms-1):
# for j in range(i+1, num_geoms):
# pairs.append((i,j))
# assert len(pairs) == num_pairs
# return pairs
43 changes: 32 additions & 11 deletions kaplan/geometry.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@


from kaplan.energy import prep_psi4_geom, run_energy_calc


def generate_parser(mol_input_dict):
"""Returns parser object (from vetee)."""
if mol_input_dict['struct_type'] == 'xyz':
Expand All @@ -26,19 +22,44 @@ def generate_parser(mol_input_dict):
parser.multip = mol_input_dict['multip']
return parser

def write_output(dihedrals):
pass
def zmatrix_to_xyz(zmatrix):
"""Make xyz coordinates from a zmatrix com file."""
# make a trivial com file with the zmatrix included
# read in the com file with vetee.com(trivial_file)
# make new vetee.xyz object
# copy coords from zmatrix
# return the object xyz coordinates:
# list[[a1,x1,y1,z1], [a2,x2,y2,z2], ..., [an,xn,yn,zn]]
# delete files
xyz = []
return xyz

def generate_zmatrix(parser, new_dihedrals):
"""Make a zmatrix string.
def generate_zmatrix(parser, dihedrals):
"""Make a zmatrix comfile for a specific conformer.
Parameters
----------
parser : parser object
Contains the original geometry for the molecule
of interest.
new_dihedrals : list of floats
The new dihedral angles for the molecule.
dihedrals : list(int)
The dihedrals to be combined with
the original geometry (in parser).
Returns
-------
zmatrix : str
The full geometry specification of the
molecule in a file of name zmatrix.
"""
pass
# generate a full geometry specification
# in the form of a zmatrix using the dihedrals
# found in the slot at pmem_index
zmatrix = ""

# generate zmatrix based on initial geom (parser)
# replace the dihedrals with the dihedrals from the
# pmem object
# return a string
return zmatrix
2 changes: 1 addition & 1 deletion kaplan/mol_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def verify_mol_input(mol_input_dict):
# in the selected program
parser = generate_parser(mol_input_dict)
# check here if error message is raised
run_energy_calc(prep_psi4_geom(parser))
run_energy_calc(prep_psi4_geom(parser.coords, parser.charge, parser.multip))
# if no error message, initial geometry converges, we are good
return parser

9 changes: 2 additions & 7 deletions kaplan/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,11 @@ def generate_children(parent1, parent2, num_muts, num_swaps):
maximum number of mutations to perform
num_swaps : int
maximum number of swaps to perform
ring : object
Returns
-------
two new pmems for the ring [pmem, pmem]
Notes
-----
actually the return value will be two lists
of lists representing the new dihedral angles
two new sets of dihedral angles with which
to make pmem objects (list(list(int))x2
"""
# make copies of the two parent pmems
Expand Down
5 changes: 4 additions & 1 deletion kaplan/output.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@

OUTPUT_FORMAT = 'xyz'
from kaplan.geometry import generate_zmatrix, zmatrix_to_xyz

# OUTPUT_FORMAT = 'xyz'

def run_output(ring):
"""Run the output module.
Expand All @@ -25,5 +27,6 @@ def run_output(ring):
average_fit = total_fit / ring.num_filled

# generate the output file for the best pmem
zmatrix_to_xyz(generate_zmatrix(ring.parser, ring.pmems[best_pmem].dihedrals))


16 changes: 10 additions & 6 deletions kaplan/pmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Pmem(object):
"""Population member of the ring."""

def __init__(self, ring_loc, num_geoms, num_atoms,
current_mev):
current_mev, dihedrals=None):
"""Constructor for pmem object.
Parameters
Expand All @@ -24,6 +24,8 @@ def __init__(self, ring_loc, num_geoms, num_atoms,
list for each conformer.
current_mev : int
The mating event at which the pmem was constructed.
dihedrals : list(list(int))
The dihedrals for the pmem. Defaults to None.
Attributes
----------
Expand All @@ -41,11 +43,13 @@ def __init__(self, ring_loc, num_geoms, num_atoms,
"""
self.ring_loc = ring_loc
# generate random dihedral angles (degrees)
# each row is a set of dihedral angles for one conformer
self.dihedrals = np.random.randint(MIN_VALUE, MAX_VALUE,
size=(num_geoms, num_atoms-3))
if dihedrals is None:
# generate random dihedral angles (degrees)
# each row is a set of dihedral angles for one conformer
self.dihedrals = np.random.randint(MIN_VALUE, MAX_VALUE,
size=(num_geoms, num_atoms-3))
else:
self.dihedrals = dihedrals
self.fitness = None
self.energies = np.zeros(num_geoms, float)
self.birthday = current_mev

37 changes: 17 additions & 20 deletions kaplan/ring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

from kaplan.pmem import Pmem
from kaplan.fitg import
from kaplan.fitg import get_fitness
from kaplan.geometry import generate_zmatrix, zmatrix_to_xyz
import numpy as np

"""
Expand Down Expand Up @@ -115,40 +116,36 @@ def __init__(self, num_geoms, num_atoms, num_slots,
self.num_filled = 0
self.pmems = np.full(self.num_slots, None)

def make_zmatrix(self, pmem_index):
# generate a full geometry specification
# in the form of a zmatrix using the dihedrals
# found in the slot at pmem_index
zmatrix = ""
if self.pmems[pmem_index] == None:
raise ValueError(f"Empty slot: {pmem_index}. No dihedrals with which to generate a zmatrix.")
# generate zmatrix based on initial geom (parser)
# replace the dihedrals with the dihedrals from the
# pmem object
# return a string
return zmatrix

def calc_fitness(self, pmem_index, zmatrix):
"""Calculate the fitness of a pmem.
def set_fitness(self, pmem_index):
"""Set the fitness value for a pmem.
Parameters
----------
pmem_index : int
The location of the pmem in the ring.
zmatrix : str
The full geometry specification for
the pmem at pmem_index.
Notes
-----
Sets the value of pmem.fitness
Raises
------
ValueError
Slot is empty for given pmem_index.
Returns
-------
None
"""
pass
if self.pmems[pmem_index] == None:
raise ValueError(f"Empty slot: {pmem_index}.")
# construct zmatrices
zmatrices = [generate_zmatrix(self.parser, self.pmems[pmem_index].dihedrals[i]) for i in range(self.num_geoms)]
xyz_coords = [zmatrix_to_xyz(zmatrix) for zmatrix in zmatrices]
self.pmems[pmem_index].fitness = get_fitness(xyz_coords, self.parser.method, self.parser.basis, self.fit_form, self.coef_energy, self.coef_rmsd, self.parser.charge, self.parser.multip)
# delete any created files**************


def update(self, parent_index, child):
"""Add child to ring based on parent location.
Expand Down
32 changes: 32 additions & 0 deletions kaplan/rmsd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

# NOTE: you need to have rmsd installed
# for the rmsd module to work

from subprocess import run, PIPE

def calc_rmsd(f1, f2):
"""Calculate the root-mean-square deviation.
Parameters
----------
f1 : str
The filename for the first geometry. Should
be xyz or pdb.
f2 : str
The filename for the second geometry. Should
be xyz or pdb.
Returns
-------
rmsd : float
The rmsd for the two molecular geometries.
This value is after the rotation matrix
is calculated and applied.
"""
r = run(['calculate_rmsd', f1, f2], stdout=PIPE)
output = r.stdout
output = str(output)[2:]
output = output.replace('\\n', ', ')
output = output[:-3]
return float(output)
Loading

0 comments on commit 867af31

Please sign in to comment.