In [1]:
import argparse
import os
import sys
import h5py

from utils01 import ReadXVGs
from utils01 import GROParser
from utils01 import DiscriptorGenerator


In [2]:
OUTDIR = 'workspace/01-make-datasets'
CUTOFF_RADIUS = 1.0
TRAIN_SIZE = 0.9  # used if validation data is specified

TRAIN_NAME = "training"
VAL_NAME = "validation"
EXPLANATORY_NAME = "x"
RESPONSE_NAME = "y"


In [3]:
parser = argparse.ArgumentParser(description='This script create datasets for deep learning.')
parser.add_argument('-i', '--inputs', action='append', nargs=4, metavar=('coord','force', 'init_time', 'maxlen'),
                    required=True, help='two xvg files, init_time, and maxlen')
parser.add_argument('-v', '--inputs_val', action='append', nargs=4, metavar=('coord','force', 'init_time', 'maxlen'), 
                    help='if you prepare validation data aside from inputted files, specify the two files')

parser.add_argument('-o', default=os.path.join(OUTDIR, 'datasets.hdf5'),
                    type=str, help='output file name (.hdf5 is recommended)')
parser.add_argument('-f', action="store_true",
                    help="force to process whether the output file exist")

parser.add_argument('--gro', type=str, help='specify .gro path if want to include amino infomation into datasets')

parser.add_argument('-b', '--batch', type=int,
                    help='batchsize for one process (recommend: the Number of Frames, divided by any natural number)')
parser.add_argument('--cb', action="store_true", help='mainchain + CB mode')
parser.add_argument('--only_terminal_rate', type=float, default=0.0, help='to inclease both terminal datasets')
parser.add_argument('--skip', type=int, default=1, help='read data each skip')

a = "-i input/xvg/run2/Protein-coord.xvg input/xvg/run2/Protein-force.xvg 0 100 --gro input/solv_ions_prot.gro"
args = parser.parse_args(a.split())


In [4]:
groparser = GROParser(args.gro, CUTOFF_RADIUS, args.cb)
MAINCHAIN = groparser.mainchains
N_ATOMS = groparser.n_atoms
EACH_N_ATOMS = groparser.each_n_atoms
SLICE_INDECES = groparser.slice_indeces
ARRANGED_INDECES = groparser.arranged_indeces
ADJACENT_INDECES = groparser.adjacent_indeces
AB_INDECES = groparser.ab_indeces
ATOM_ALIGN = groparser.atom_align
TARGET_ATOM_INDECES_FOR_XVG = groparser.target_atom_indeces_for_xvg


In [5]:
groparser.resid_group_indeces

{1: [0, 309, 618, 927],
 2: [1, 310, 619, 928],
 3: [2, 311, 620, 929],
 4: [3, 312, 621, 930],
 5: [4, 313, 622, 931],
 6: [5, 314, 623, 932],
 7: [6, 315, 624, 933],
 8: [7, 316, 625, 934],
 9: [8, 317, 626, 935],
 10: [9, 318, 627, 936],
 11: [10, 319, 628, 937],
 12: [11, 320, 629, 938],
 13: [12, 321, 630, 939],
 14: [13, 322, 631, 940],
 15: [14, 323, 632, 941],
 16: [15, 324, 633, 942],
 17: [16, 325, 634, 943],
 18: [17, 326, 635, 944],
 19: [18, 327, 636, 945],
 20: [19, 328, 637, 946],
 21: [20, 329, 638, 947],
 22: [21, 330, 639, 948],
 23: [22, 331, 640, 949],
 24: [23, 332, 641, 950],
 25: [24, 333, 642, 951],
 26: [25, 334, 643, 952],
 27: [26, 335, 644, 953],
 28: [27, 336, 645, 954],
 29: [28, 337, 646, 955],
 30: [29, 338, 647, 956],
 31: [30, 339, 648, 957],
 32: [31, 340, 649, 958],
 33: [32, 341, 650, 959],
 34: [33, 342, 651, 960],
 35: [34, 343, 652, 961],
 36: [35, 344, 653, 962],
 37: [36, 345, 654, 963],
 38: [37, 346, 655, 964],
 39: [38, 347, 656, 965],
 40: 

In [6]:
groparser.resid_dict

{1: ['ASP', 14],
 2: ['ILE', 19],
 3: ['THR', 14],
 4: ['VAL', 16],
 5: ['TYR', 21],
 6: ['ASN', 14],
 7: ['GLY', 7],
 8: ['GLN', 17],
 9: ['HIS', 17],
 10: ['LYS', 22],
 11: ['GLU', 15],
 12: ['ALA', 10],
 13: ['ALA', 10],
 14: ['THR', 14],
 15: ['ALA', 10],
 16: ['VAL', 16],
 17: ['ALA', 10],
 18: ['LYS', 22],
 19: ['ALA', 10],
 20: ['PHE', 20],
 21: ['GLU', 15],
 22: ['GLN', 17],
 23: ['GLU', 15],
 24: ['THR', 14],
 25: ['GLY', 7],
 26: ['ILE', 19],
 27: ['LYS', 22],
 28: ['VAL', 16],
 29: ['THR', 14],
 30: ['LEU', 19],
 31: ['ASN', 14],
 32: ['SER', 11],
 33: ['GLY', 7],
 34: ['LYS', 22],
 35: ['SER', 11],
 36: ['GLU', 15],
 37: ['GLN', 17],
 38: ['LEU', 19],
 39: ['ALA', 10],
 40: ['GLY', 7],
 41: ['GLN', 17],
 42: ['LEU', 19],
 43: ['LYS', 22],
 44: ['GLU', 15],
 45: ['GLU', 15],
 46: ['GLY', 7],
 47: ['ASP', 12],
 48: ['LYS', 22],
 49: ['THR', 14],
 50: ['PRO', 14],
 51: ['ALA', 10],
 52: ['ASP', 12],
 53: ['VAL', 16],
 54: ['PHE', 20],
 55: ['TYR', 21],
 56: ['THR', 14],
 57: [