In [1]:
import tensorflow as tf
tf.__version__
# import biopython
from Bio.PDB.PDBParser import PDBParser
# !pip install biopython
# tf.InteractiveSession()

In [18]:
base_dir = "input_output/"
input_dir = "testing/"
pred_dir = "outputsTesting/"
NUM_DIMENSIONS = 3
num_evo_entries = 0

In [None]:
# need: amino acids, structure
parser = PDBParser(PERMISSIVE=1)

In [None]:
structure_id = "3srp"
filename = base_dir  + "3srp.pdb"
structure = parser.get_structure(structure_id, filename)

In [None]:
structure

In [None]:
files = [base_dir + input_dir + '1']
file_queue = tf.train.string_input_producer(
        files,
        num_epochs=1)

reader = tf.TFRecordReader()
_, serialized_example = reader.read(file_queue)

In [None]:

seq_feature = tf.FixedLenSequenceFeature
context, features = tf.parse_single_sequence_example(
    serialized_example,
    context_features={'id': tf.FixedLenFeature((1,), tf.string)},
    sequence_features={
        'primary': seq_feature(
            (1,), tf.int64),
        'tertiary': seq_feature(
            (NUM_DIMENSIONS,), tf.float32, allow_missing=True),
        'mask': seq_feature(
            (1,), tf.float32, allow_missing=True)})

In [None]:
id_ = context['id'][0]
primary = tf.to_int32(features['primary'][:, 0])
tertiary = features['tertiary']
mask = features['mask'][:, 0]

In [None]:
# id_.eval()
with tf.Session() as sess:
    init_op = tf.initialize_all_variables()
    sess.run(init_op)
    out = id_.eval()

In [None]:
import tensorflow as tf

NUM_AAS = 20
NUM_DIMENSIONS = 3

def masking_matrix(mask, name=None):
    """ Constructs a masking matrix to zero out pairwise distances due to missing residues or padding. 
    Args:
        mask: 0/1 vector indicating whether a position should be masked (0) or not (1)
    Returns:
        A square matrix with all 1s except for rows and cols whose corresponding indices in mask are set to 0.
        [MAX_SEQ_LENGTH, MAX_SEQ_LENGTH]
    """

    with tf.name_scope(name, 'masking_matrix', [mask]) as scope:
        mask = tf.convert_to_tensor(mask, name='mask')

        mask = tf.expand_dims(mask, 0)
        base = tf.ones([tf.size(mask), tf.size(mask)])
        matrix_mask = base * mask * tf.transpose(mask)

        return matrix_mask
        
def read_protein(filename_queue, max_length, num_evo_entries=21, name=None):
    """ Reads and parses a ProteinNet TF Record. 
        Primary sequences are mapped onto 20-dimensional one-hot vectors.
        Evolutionary sequences are mapped onto num_evo_entries-dimensional real-valued vectors.
        Secondary structures are mapped onto ints indicating one of 8 class labels.
        Tertiary coordinates are flattened so that there are 3 times as many coordinates as 
        residues.
        Evolutionary, secondary, and tertiary entries are optional.
    Args:
        filename_queue: TF queue for reading files
        max_length:     Maximum length of sequence (number of residues) [MAX_LENGTH]. Not a 
                        TF tensor and is thus a fixed value.
    Returns:
        id: string identifier of record
        one_hot_primary: AA sequence as one-hot vectors
        evolutionary: PSSM sequence as vectors
        secondary: DSSP sequence as int class labels
        tertiary: 3D coordinates of structure
        matrix_mask: Masking matrix to zero out pairwise distances in the masked regions
        pri_length: Length of amino acid sequence
        keep: True if primary length is less than or equal to max_length
    """

    with tf.name_scope(name, 'read_protein', []) as scope:
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)

        context, features = tf.parse_single_sequence_example(serialized_example,
                                context_features={'id': tf.FixedLenFeature((1,), tf.string)},
                                sequence_features={
                                    'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64),
                                    'evolutionary': tf.FixedLenSequenceFeature((num_evo_entries,), tf.float32, allow_missing=True),
                                    'secondary':    tf.FixedLenSequenceFeature((1,),               tf.int64,   allow_missing=True),
                                    'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
                                    'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)})
        id_ = context['id'][0]
        primary =   tf.to_int32(features['primary'][:, 0])
        evolutionary =          features['evolutionary']
        secondary = tf.to_int32(features['secondary'][:, 0])
        tertiary =              features['tertiary']
        mask =                  features['mask'][:, 0]

        pri_length = tf.size(primary)
        keep = pri_length <= max_length

        one_hot_primary = tf.one_hot(primary, NUM_AAS)

        # Generate tertiary masking matrix--if mask is missing then assume all residues are present
        mask = tf.cond(tf.not_equal(tf.size(mask), 0), lambda: mask, lambda: tf.ones([pri_length]))
        ter_mask = masking_matrix(mask, name='ter_mask')        

        return id_, one_hot_primary, evolutionary, secondary, tertiary, ter_mask, pri_length, keep

In [None]:
# tf.reset_default_graph()
files = [base_dir + input_dir + '1']

with tf.Session() as sess:
    file_queue = tf.train.string_input_producer(
        files,
        num_epochs=1)
#     x = read_protein(file_queue, 500)[0]
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    
#     feature = {
#         'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64, allow_missing=True),
#         'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
#         'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)}
#     features = tf.parse_single_example(serialized_example, features=feature)
    context, features = tf.parse_single_sequence_example(serialized_example,
                                context_features={'id': tf.FixedLenFeature((1,), tf.string)},
                                sequence_features={
                                    'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64),
                                    'evolutionary': tf.FixedLenSequenceFeature((num_evo_entries,), tf.float32, allow_missing=True),
                                    'secondary':    tf.FixedLenSequenceFeature((1,),               tf.int64,   allow_missing=True),
                                    'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
                                    'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)})
    
#     seq_feature = tf.FixedLenSequenceFeature
#     context, features = tf.parse_single_sequence_example(
#             serialized_example,
#             context_features={'id': tf.FixedLenFeature((1,), tf.string)},
#             sequence_features={
#                 'primary': seq_feature(
#                     (1,), tf.int64),
#                 'tertiary': seq_feature(
#                     (NUM_DIMENSIONS,), tf.float32, allow_missing=True),
#                 'mask': seq_feature(
#                     (1,), tf.float32, allow_missing=True)})
#     x = tf.decode_raw(features['tertiary'], tf.float32)
    tertiary = features['tertiary']
#     x = tf.decode_raw(tertiary, tf.float32)
#     tertiary = tf.print(tertiary, [tertiary])
    print(tertiary)
    x = tf.random.uniform((3,5))
    z = tf.random.uniform((5,3))
    y = tf.matmul(tertiary, x).eval()
#     y = tf.matmul(x, z).eval()
    

In [None]:
# x[0].eval()
# with tf.Session() as sess:
#     x_out = x.eval()
y

In [None]:
# tertiary
y

In [None]:
for example in tf.python_io.tf_record_iterator(base_dir + input_dir + '3'):
    result = tf.train.Example.FromString(example)
    print(result)

In [None]:
result.

In [33]:
def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
#     features = tf.parse_single_example(
#         serialized_example,
#         # Defaults are not specified since both keys are required.
#         features={
#             'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64, allow_missing=True),
#             'evolutionary': tf.FixedLenSequenceFeature((num_evo_entries,), tf.float32, allow_missing=True),
#             'secondary':    tf.FixedLenSequenceFeature((1,),               tf.int64,   allow_missing=True),
#             'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
#             'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)})

    context, features = tf.parse_single_sequence_example(serialized_example,
                                context_features={'id': tf.FixedLenFeature((1,), tf.string)},
                                sequence_features={
                                    'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64),
                                    'secondary':    tf.FixedLenSequenceFeature((1,),               tf.int64,   allow_missing=True),
                                    'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
                                    'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)})
#     image = tf.decode_raw(features['image_raw'], tf.uint8)
#     label = tf.cast(features['label'], tf.int32)
#     height = tf.cast(features['height'], tf.int32)
#     width = tf.cast(features['width'], tf.int32)
#     depth = tf.cast(features['depth'], tf.int32)
#     return image, label, height, width, depth
    primary = features['primary']
    return primary

In [46]:
import tensorflow as tf


def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
#     features = tf.parse_single_example(
#         serialized_example,
#         features={'image_raw': tf.FixedLenFeature([], tf.string)})

    context, features = tf.parse_single_sequence_example(serialized_example,
                                context_features={'id': tf.FixedLenFeature((1,), tf.string)},
                                sequence_features={
                                    'primary':      tf.FixedLenSequenceFeature((1,),               tf.int64),
                                    'secondary':    tf.FixedLenSequenceFeature((1,),               tf.int64,   allow_missing=True),
                                    'tertiary':     tf.FixedLenSequenceFeature((NUM_DIMENSIONS,),  tf.float32, allow_missing=True),
                                    'mask':         tf.FixedLenSequenceFeature((1,),               tf.float32, allow_missing=True)})
    
#     image = tf.decode_raw(features['primary'], tf.uint8)
    image = features['primary']
    return image


def get_all_records(FILE):
    with tf.Session() as sess:
        init_op = tf.initialize_all_variables()
        sess.run(init_op)
        filename_queue = tf.train.string_input_producer([FILE], num_epochs=1)
        image = read_and_decode(filename_queue)
        
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            while True:
                example = sess.run([image])
        except tf.errors.OutOfRangeError as e:
            coord.request_stop(e)
        finally:
            coord.request_stop()
            coord.join(threads)


get_all_records(base_dir + input_dir + '1')

INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.FailedPreconditionError'>, Attempting to use uninitialized value input_producer/limit_epochs/epochs
	 [[{{node input_producer/limit_epochs/CountUpTo}}]]


FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
	 [[{{node input_producer/limit_epochs/CountUpTo}}]]

In [78]:
tf.reset_default_graph()
with tf.Session() as sess:
    init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())
    sess.run(init_op)
    filename_queue = tf.train.string_input_producer(
        [base_dir + input_dir + '1'],
        shuffle=False
#         num_epochs=1
        )
    primary = read_and_decode(filename_queue)
#     image = tf.reshape(image, tf.pack([height, width, 3]))
#     image.set_shape([32, 32, 3])
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    size = sum(1 for _ in tf.python_io.tf_record_iterator(base_dir + input_dir + '1'))
    for i in range(size):
        example = sess.run([primary])
        print(len(example[0]))
#         print(i)



#     i = 0
#     try:
#         while True:
#             example = sess.run([primary])
#             print(i)
#             i += 1
            
#     except tf.errors.OutOfRangeError as e:
# #         coord.request_stop(e)
# #         print(i)
#         pass
#         print(i)




#     finally:
#         coord.request_stop()
#         coord.join(threads)
    coord.request_stop()
    coord.join(threads)

353
130
624
137
546
75
341
470
409
501
445
142
89
315
138
166
220
149
75
358
277
409
292
161
133
161
193
106
486
183
246
285
121
328
346
670
120
563
239
223
129
386
91
388
242
323
337
96
409
487
193
324
191
202
104
242
340
423
382
375
568
116
169
353
