## Import tensorflow and tensorflow_datasets

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds


  from .autonotebook import tqdm as notebook_tqdm


## Create Custom Dataset using tf.data API

In [3]:

# Task 1: Create Custom Dataset using tf.data API
X_train = [[1,2,3], [4,5,6], [7,8,9], [10,11,12]] # sample data
y_train = [0,1,0,1] # sample labels


In [4]:

# Create a tf.data.Dataset
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))

# Shuffle and batch the dataset
dataset = dataset.shuffle(buffer_size=len(X_train)).batch(batch_size=2)


## Loading data using tfds

In [5]:

# Load the MNIST dataset
mnist_info = tfds.builder('mnist').info

# Print information about the dataset
print("MNIST Dataset Information:")
print(mnist_info)
print("Size of each split:")
print(mnist_info.splits)
print("Size of labels:")
print(mnist_info.features['label'].num_classes)
print("Labels:")
print(mnist_info.features['label'].names)


MNIST Dataset Information:
tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_dir='C:\\Users\\New User\\tensorflow_datasets\\mnist\\3.0.1',
    file_format=tfrecord,
    download_size=Unknown size,
    dataset_size=Unknown size,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
)
Size of each split:
{}
Size of labels:
10
Labels:
['0', '1', '2', '3', '4', '5', '6', '7'

In [6]:

# Load the IMDB reviews dataset
imdb_info = tfds.builder('imdb_reviews').info

# Print information about the dataset
print("\nIMDB Reviews Dataset Information:")
print(imdb_info)
print("Size of each split:")
print(imdb_info.splits)
print("Size of labels:")
print(imdb_info.features['label'].num_classes)
print("Labels:")
print(imdb_info.features['label'].names)



IMDB Reviews Dataset Information:
tfds.core.DatasetInfo(
    name='imdb_reviews',
    full_name='imdb_reviews/plain_text/1.0.0',
    description="""
    Large Movie Review Dataset. This is a dataset for binary sentiment
    classification containing substantially more data than previous benchmark
    datasets. We provide a set of 25,000 highly polar movie reviews for training,
    and 25,000 for testing. There is additional unlabeled data for use as well.
    """,
    config_description="""
    Plain text
    """,
    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
    data_dir='C:\\Users\\New User\\tensorflow_datasets\\imdb_reviews\\plain_text\\1.0.0',
    file_format=tfrecord,
    download_size=Unknown size,
    dataset_size=Unknown size,
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=int64, num_classes=2),
        'text': Text(shape=(), dtype=string),
    }),
    supervised_keys=('text', 'label'),
    disable_shuffling=False,
    splits={
    },
 

## Custom processing and augmentation

In [17]:


# Define custom preprocessing and augmentation functions
def preprocess(element):
    image, label = element
    image = tf.image.random_flip_left_right(image)
    return image, label


In [22]:

# Apply custom preprocessing to the MNIST dataset
mnist_dataset = tfds.load('mnist', split='train')#.map(preprocess)

# Shuffle, batch, and prefetch the dataset
mnist_dataset = mnist_dataset.shuffle(buffer_size=10000).batch(batch_size=32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Iterate over the augmented dataset and list the first 5 batches
for i, (images, labels) in enumerate(mnist_dataset.take(5)):
    print(f"Batch {i+1}:")
    print("Images shape:", images)
    print("Labels:", labels)


Batch 1:
Images shape: image
Labels: label
Batch 2:
Images shape: image
Labels: label
Batch 3:
Images shape: image
Labels: label
Batch 4:
Images shape: image
Labels: label
Batch 5:
Images shape: image
Labels: label
