## Sketch Dataset

In [1]:
import tensorflow as tf 
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re

In [2]:
DOWNLOAD_ROOT = "http://download.tensorflow.org/data/"
FILENAME = "quickdraw_tutorial_dataset_v1.tar.gz"
filepath = keras.utils.get_file(FILENAME,
                                DOWNLOAD_ROOT + FILENAME,
                                cache_subdir="datasets/quickdraw",
                                extract=True)

## Reading the data


In [18]:
file_paths = "/home/daniel/.keras/datasets/quickdraw"

In [19]:
train_files = []
eval_files = []

In [20]:
for tfrecord in os.listdir(file_paths):
    if tfrecord.split(".")[0]=="training" and not tfrecord.endswith(".classes"):
        train_files.append(os.path.join(file_paths, tfrecord))
    if tfrecord.split(".")[0]=="eval" and not tfrecord.endswith(".classes"):
        eval_files.append(tfrecord)
    

In [21]:
train_files

['/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00008-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00006-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00009-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00007-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00002-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00005-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00001-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00004-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00000-of-00010',
 '/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00003-of-00010']

In [22]:
tfrecord_dataset = tf.data.TFRecordDataset(train_files)

In [23]:
feature_descriptions = {"ink": tf.io.VarLenFeature(dtype=tf.float32),"shape": tf.io.FixedLenFeature([2], dtype=tf.int64),"class_index": tf.io.FixedLenFeature([1], dtype=tf.int64)}

In [31]:
for tensor in tfrecord_dataset.skip(3).take(1):
    example = tf.io.parse_example(tensor, feature_descriptions)
    
    print(example)
    print(example["class_index"])
    print(example["shape"])
    print(example["ink"])
    print(tf.sparse.to_dense(example["ink"]))

{'ink': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe03820db80>, 'class_index': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([51])>, 'shape': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([24,  3])>}
tf.Tensor([51], shape=(1,), dtype=int64)
tf.Tensor([24  3], shape=(2,), dtype=int64)
SparseTensor(indices=tf.Tensor(
[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]
 [28]
 [29]
 [30]
 [31]
 [32]
 [33]
 [34]
 [35]
 [36]
 [37]
 [38]
 [39]
 [40]
 [41]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]
 [57]
 [58]
 [59]
 [60]
 [61]
 [62]
 [63]
 [64]
 [65]
 [66]
 [67]
 [68]
 [69]
 [70]
 [71]], shape=(72, 1), dtype=int64), values=tf.Tensor(
[-0.0283688   0.07843137  0.         -0.0212766   0.21960786  0.
 -0.0212766   0.26274508  0.          0.0212766   0.06666666  0.
  0.8794326   0.00784314  0.          0

In [32]:
examples = tf.io.parse_example(tfrecord_dataset, feature_descriptions)


ValueError: Attempt to convert a value (<TFRecordDatasetV2 shapes: (), types: tf.string>) with an unsupported type (<class 'tensorflow.python.data.ops.readers.TFRecordDatasetV2'>) to a Tensor.

In [37]:
train_files

['training.tfrecord-00008-of-00010',
 'training.tfrecord-00006-of-00010',
 'training.tfrecord-00009-of-00010',
 'training.tfrecord-00007-of-00010',
 'training.tfrecord-00002-of-00010',
 'training.tfrecord-00005-of-00010',
 'training.tfrecord-00001-of-00010',
 'training.tfrecord-00004-of-00010',
 'training.tfrecord-00000-of-00010',
 'training.tfrecord-00003-of-00010']

In [38]:
eval_files

['eval.tfrecord-00008-of-00010',
 'eval.tfrecord-00003-of-00010',
 'eval.tfrecord-00007-of-00010',
 'eval.tfrecord-00001-of-00010',
 'eval.tfrecord-00006-of-00010',
 'eval.tfrecord-00000-of-00010',
 'eval.tfrecord-00005-of-00010',
 'eval.tfrecord-00009-of-00010',
 'eval.tfrecord-00002-of-00010',
 'eval.tfrecord-00004-of-00010']

In [49]:
tensor = tf.data.TFRecordDataset(train_files)

In [50]:
tensor

<TFRecordDatasetV2 shapes: (), types: tf.string>

In [51]:
train_files[:1]

['/home/daniel/.keras/datasets/quickdraw/training.tfrecord-00008-of-00010']

In [52]:
for x in tensor: 
    print(x)
    break

tf.Tensor(b'\n\xb8\x06\n\x14\n\x0bclass_index\x12\x05\x1a\x03\n\x01\x04\n\x0f\n\x05shape\x12\x06\x1a\x04\n\x02@\x03\n\x8e\x06\n\x03ink\x12\x86\x06\x12\x83\x06\n\x80\x06\x00\x00\x00\x00p\xa9Y\xbd\x00\x00\x00\x00\x88\xc3\xe1\xbcp\xa9\xd9\xbd\x00\x00\x00\x00\x00\x02\x01\xbc\xdc#-\xbe\x00\x00\x00\x00\x08\x83\xc1\xbc\xb0L\x9e\xbd\x00\x00\x80?h\xb3\xd9>n\xa9\xd9>\x00\x00\x00\x00\x80\xc3\xe1<\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x01<\x00sm\xbc\x00\x00\x00\x00\x00\x00\x00\x00\xe0g\x94\xbd\x00\x00\x00\x00@\xa3Q\xbdJ\xfc\x8c\xbe\x00\x00\x80?\x90B!\xbd\x80L\x9e;\x00\x00\x00\x00\x00\x02\x01<\xa0L\x9e<\x00\x00\x00\x00\x00\x02\x81\xbc\xc0L\x1e<\x00\x00\x00\x00\xe4r\xb9\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x88\xc3a=\x00s\xed\xbc\x00\x00\x00\x00H\xa3\xd1\xbd\xd0\xdfE=\x00\x00\x00\x00\xe4r\xb9\xbd\xc0L\x1e<\x00\x00\x00\x00\x00\x02\x01\xbcp\xa9Y\xbd\x00\x00\x00\x00@"\x11=\xd8\xdf\xc5\xbd\x00\x00\x00\x00\x00\x83A\xbc\x00sm\xbc\x00\x00\x00\x00\x84B\xa1\xbdp\xa9Y=\x00\x00\x00\x00&\x93\xc9\xbd\xa0L\x9e<\x

2021-10-04 11:08:50.400044: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
