<a href="https://colab.research.google.com/github/Jaybhatt216/EIT/blob/main/MNIST_and_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd




from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, LSTM, Bidirectional

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds

tf.enable_v2_behavior()

## Step 1: Create your input pipeline

In [9]:
#get the MNIST data set and perform train test split 

(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)


## Build training pipeline
## Apply the following transormations:

## ds.map: TFDS provide the images as tf.uint8, while the model expect tf.float32, so normalize images
##ds.cache As the dataset fit in memory, cache before shuffling for better performance.
##Note: Random transformations should be applied after caching
##ds.shuffle: For true randomness, set the shuffle buffer to the full dataset size.
##Note: For bigger datasets which do not fit in memory, a standard value is 1000 if your system allows it.
##ds.batch: Batch after shuffling to get unique batches at each epoch.
##ds.prefetch: Good practice to end the pipeline by prefetching for performances.

In [10]:
#converting or normalizing from unit8 to float32

def normalize_img(image, label):
  return tf.cast(image, tf.float32) / 255., label

ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

#data AUTOTUNE. prefetch transformation, which can be used to decouple the time 
#when data is produced from the time when data is consumed. ... 
#In particular, the transformation uses a background thread and an internal buffer to prefetch elements 
#from the input dataset ahead of the time they are requested.



#cache transformation can cache a dataset, either in memory or on local storage. 
#This will save some operations (like file opening and data reading) 
#from being executed during each epoch.


#Dataset.prefetch() method that makes it easier to add prefetching at 
#any point in the pipeline, not just after a map()
#For example, Dataset.prefetch() will start a background thread to populate a 
#ordered buffer that acts like a tf.FIFOQueue, so that downstream pipeline stages need not block. 
#However, the prefetch() implementation is much simpler, 
#because it doesn't need to support as many different concurrent operations as a tf.FIFOQueue.

## Build evaluation pipeline
## Testing pipeline is similar to the training pipeline, with small differences:

## No ds.shuffle() call
## Caching is done after batching (as batches can be the same between epoch)

In [11]:


ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)


## Step 2: Create and train the model
## Plug the input pipeline into Keras.

In [12]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128,activation='relu'),
  tf.keras.layers.Dense(10)
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

model.fit(
    ds_train,
    epochs=6,
    validation_data=ds_test,
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f09712d7358>