In [1]:
import os
import matplotlib.pyplot

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

In [3]:
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
BATCH_SIZE = 64

We Will use Tensorflow Datasets

### MNIST form tfds

In [None]:
(ds_train,ds_test), ds_info = tfds.load(
    "mnist",
    split = ['train','test'],
    shuffle_files= True,
    as_supervised = True, #tuple of img,label
    with_info = True
)

In [None]:
ds_info

In [None]:
def normalize_img(image,label):
    #normalize images
    return tf.cast(image, tf.float32)/255.0 , label

AUTOTUNE = tf.data.experimental.AUTOTUNE #to autotune hyperparameters where it is called
ds_train = ds_train.map(normalize_img, num_parallel_calls = AUTOTUNE) #applies function to call images, labels
ds_train = ds_train.cache() #
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

ds_test = ds_test.map(normalize_img, num_parallel_calls = AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.prefetch(AUTOTUNE)

In [None]:
model = keras.Sequential([
    keras.Input((28,28,1)),
    layers.Conv2D(32,3,activation='relu'),
    layers.Flatten(),
    layers.Dense(10,activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(lr=0.001),
    loss = keras.losses.SparseCategoricalCrossentropy(),
    metrics = ['accuracy']
)

model.fit(ds_train, epochs=20, verbose=2)
model.evaluate(ds_test)

### Sentiment Analysis

In [5]:
(ds_train,ds_test), ds_info = tfds.load(
    "imdb_reviews",
    split = ['train','test'],
    shuffle_files= True,
    as_supervised = True, #tuple of img,label
    with_info = True
)

In [6]:
tokenizer = tfds.features.text.Tokenizer() #text tokenizer

def build_vocabulary():
    vocabulary = set()
    for text, _ in ds_train:
        vocabulary.update(tokenizer.tokenize(text.numpy().lower())) #add all words to vocab
    return vocabulary

vocabulary = build_vocabulary()

In [7]:
encoder = tfds.features.text.TokenTextEncoder(
    vocabulary, oov_token = "<UNK>" , lowercase = True, tokenizer=tokenizer
)

def encoding_fn(text_tensor, label):
    return encoder.encode(text_tensor.numpy()), label

def encode_map(text, label):
    #specifiy the i/o of above function
    encoded_text, label = tf.py_function(
        encoding_fn, inp = [text, label], Tout = (tf.int64, tf.int64)
    )
    
    encoded_text.set_shape([None])
    label.set_shape([])
    
    return encoded_text, label

In [8]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = ds_train.map(encode_map, num_parallel_calls = AUTOTUNE).cache()
ds_train = ds_train.shuffle(10000)
ds_train = ds_train.padded_batch(32, padded_shapes=([None],()))
ds_train = ds_train.prefetch(AUTOTUNE)

ds_test = ds_test.map(encode_map)
ds_test = ds_test.padded_batch(32, padded_shapes=([None],()))

In [9]:
model = keras.Sequential(
    [
        layers.Masking(mask_value = 0), #ignore the values with padded 0s
        layers.Embedding(input_dim=len(vocabulary) +2, output_dim = 32),
        
        #Batchsize x 100 x32 is given. out
        layers.GlobalAveragePooling1D(),
        
        # now is Batchsize x 32 (taking avg)
        layers.Dense(64, activation = 'relu'),
        layers.Dense(1)
    ]
)

model.compile(
    loss = keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(3e-4,clipnorm=1),
    metrics = ['accuracy']
)

model.fit(ds_train, epochs=10, verbose=2)

Epoch 1/10
782/782 - 18s - loss: 0.6758 - accuracy: 0.5058
Epoch 2/10
782/782 - 16s - loss: 0.5016 - accuracy: 0.7187
Epoch 3/10
782/782 - 16s - loss: 0.3420 - accuracy: 0.8566
Epoch 4/10
782/782 - 17s - loss: 0.2733 - accuracy: 0.8914
Epoch 5/10
782/782 - 16s - loss: 0.2322 - accuracy: 0.9110
Epoch 6/10
782/782 - 17s - loss: 0.2030 - accuracy: 0.9234
Epoch 7/10
782/782 - 16s - loss: 0.1793 - accuracy: 0.9350
Epoch 8/10
782/782 - 18s - loss: 0.1591 - accuracy: 0.9431
Epoch 9/10
782/782 - 17s - loss: 0.1421 - accuracy: 0.9505
Epoch 10/10
782/782 - 18s - loss: 0.1270 - accuracy: 0.9571


<tensorflow.python.keras.callbacks.History at 0x7f91883f8d60>

In [10]:
model.evaluate(ds_test)



[0.29252007603645325, 0.8908399939537048]