In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds


In [2]:
glue, info = tfds.load('glue/mrpc', with_info = True, batch_size = -1)

In [3]:
list(glue.keys())

['test', 'train', 'validation']

In [4]:
info

tfds.core.DatasetInfo(
    name='glue',
    version=1.0.0,
    description='GLUE, the General Language Understanding Evaluation benchmark
(https://gluebenchmark.com/) is a collection of resources for training,
evaluating, and analyzing natural language understanding systems.',
    homepage='https://www.microsoft.com/en-us/download/details.aspx?id=52398',
    features=FeaturesDict({
        'idx': tf.int32,
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'sentence1': Text(shape=(), dtype=tf.string),
        'sentence2': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=5801,
    splits={
        'test': 1725,
        'train': 3668,
        'validation': 408,
    },
    supervised_keys=None,
    citation="""@inproceedings{dolan2005automatically,
      title={Automatically constructing a corpus of sentential paraphrases},
      author={Dolan, William B and Brockett, Chris},
      booktitle={Proceedings of the Third International Workshop on Para

In [7]:
info.features

FeaturesDict({
    'idx': tf.int32,
    'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
    'sentence1': Text(shape=(), dtype=tf.string),
    'sentence2': Text(shape=(), dtype=tf.string),
})

In [11]:
info.features['label'].names

['not_equivalent', 'equivalent']

In [16]:
glue['train']

{'idx': <tf.Tensor: shape=(3668,), dtype=int32, numpy=array([1680, 1456, 3017, ..., 3682, 3619, 3624])>,
 'label': <tf.Tensor: shape=(3668,), dtype=int64, numpy=array([0, 0, 1, ..., 1, 0, 1], dtype=int64)>,
 'sentence1': <tf.Tensor: shape=(3668,), dtype=string, numpy=
 array([b'The identical rovers will act as robotic geologists , searching for evidence of past water .',
        b"Less than 20 percent of Boise 's sales would come from making lumber and paper after the OfficeMax purchase is completed .",
        b'Spider-Man snatched $ 114.7 million in its debut last year and went on to capture $ 403.7 million .',
        ...,
        b'Stack said he did no work for Triumph until 1999 , when a grand jury began investigating Silvester .',
        b'The giant rock was first observed on August 24 by Lincoln Near-Earth Asteroid Research Program , based in Socorro , New Mexico .',
        b"Two of Collins ' top assistants will consult with state police during the investigation and determine 

In [30]:
for key, value in glue['train'].items():
    print(f'{key:9s}: {value[1]}')

idx      : 1456
label    : 0
sentence1: b"Less than 20 percent of Boise 's sales would come from making lumber and paper after the OfficeMax purchase is completed ."
sentence2: b"Less than 20 percent of Boise 's sales would come from making lumber and paper after the OfficeMax purchase is complete , assuming those businesses aren 't sold ."


In [43]:
rnd = tf.random.uniform((3,5))
ds = tf.data.Dataset.from_tensor_slices(rnd)

for item in ds.batch(1):
    print(item)

tf.Tensor([[0.6531817  0.6760706  0.65824103 0.38519073 0.5814389 ]], shape=(1, 5), dtype=float32)
tf.Tensor([[0.82234144 0.5917027  0.71501493 0.44235492 0.74261975]], shape=(1, 5), dtype=float32)
tf.Tensor([[0.33664274 0.42757535 0.01813698 0.50278234 0.9266362 ]], shape=(1, 5), dtype=float32)
