In [2]:
import numpy as np
import tensorflow
print(tensorflow.__version__) #Tensorflow version 1. required to run code

1.15.2


###Inspect single tfrecord file

Get the names of example features and shape.
Also prints part of basic information.

In [5]:
from pprint import pprint

def parse_record_str(record_str: str):
    '''Parses a record str and returns the feature map.
    Args
    - record_str: str, binary representation of Example message
    '''
    # parse binary string into Example message
    ex = tensorflow.train.Example.FromString(record_str)
    features = ex.features  # get Features message within the Example
    feature_map = features.feature  # get mapping from feature name strings to Feature
    return feature_map

def get_first_feature_map(tfrecord_path: str):
    '''Gets feature_map dict of 1st TFRecord in a TFRecord file.
    Args
    - tfrecord_path: str, path to a TFRecord file with GZIP compression
    Returns
    - feature_map: protobuf map from feature name strings to Feature
    '''
    # Create an iterator over the TFRecords file. The iterator yields
    # the binary representations of Example messages as strings.
    options = tensorflow.io.TFRecordOptions()
    iterator = tensorflow.io.tf_record_iterator(tfrecord_path, options=options)

    # get the first Example stored in the TFRecords file
    record_str = next(iterator)
    feature_map = parse_record_str(record_str)
    return feature_map

def get_feature_types(feature_map):
    '''Gets the types and shapes of each feature in a given feature_map.
    Args
    - feature_map: protobuf map from feature name strings to Feature
    Returns
    - feature_types: dict, maps feature names (str) to tuple of (ft_type, ft_shape)
    '''
    # use the WhichOneof() method on messages with `oneof` fields to
    # determine the type of the field
    feature_types = {}
    for name in feature_map.keys():
        ft_type = feature_map[name].WhichOneof('kind')
        ft_shape = np.array(feature_map[name].__getattribute__(ft_type).value).shape
        feature_types[name] = (ft_type, ft_shape)
    return feature_types

def print_scalar_values(feature_map):
    '''Prints scalar values from a TFRecord feature map.
    Args
    - feature_map: protobuf map from feature name strings to Feature
    '''
    for name in sorted(feature_map.keys()):
        ft_type = feature_map[name].WhichOneof('kind')
        ft_shape = np.array(feature_map[name].__getattribute__(ft_type).value).shape
        if ft_type == 'float_list' and ft_shape == (1,):
            value = feature_map[name].float_list.value[0]
            print(f'{name}: {value}')
        elif ft_type == 'bytes_list' and ft_shape == (1,):
            value = feature_map[name].bytes_list.value[0].decode()
            print(f'{name}: {value}')
        elif ft_type == 'int64_list' and ft_shape == (1,):
            value = feature_map[name].int64_list.value[0]
            print(f'{name}: {value}')

tfrecord_path = '../data/tfrecs/AO2015_580.tfrec' #path on local maschine --> change to vm path
feature_map = get_first_feature_map(tfrecord_path)
feature_types = get_feature_types(feature_map)

print(f'TFRecord path: {tfrecord_path}')
print('Features and types:')
pprint(feature_types)
print()
print_scalar_values(feature_map)

TFRecord path: ../data/tfrecs/AO2015_580.tfrec
Features and types:
{'Band 1': ('float_list', (1102500,)),
 'Band 10': ('float_list', (1102500,)),
 'Band 11': ('float_list', (1102500,)),
 'Band 12': ('float_list', (1102500,)),
 'Band 2': ('float_list', (1102500,)),
 'Band 3': ('float_list', (1102500,)),
 'Band 4': ('float_list', (1102500,)),
 'Band 5': ('float_list', (1102500,)),
 'Band 6': ('float_list', (1102500,)),
 'Band 7': ('float_list', (1102500,)),
 'Band 8': ('float_list', (1102500,)),
 'Band 8A': ('float_list', (1102500,)),
 'Band 9': ('float_list', (1102500,)),
 'Nightlight Band': ('float_list', (1102500,)),
 'centerlat': ('float_list', (1,)),
 'centerlon': ('float_list', (1,)),
 'country': ('bytes_list', (1,)),
 'urbanrural': ('bytes_list', (1,)),
 'wealth': ('float_list', (1,)),
 'wealthpooled': ('float_list', (1,)),
 'wealthpooled5country': ('float_list', (1,)),
 'year': ('int64_list', (1,))}

centerlat: -14.690727233886719
centerlon: 17.70050048828125
country: Angola
urba