In [8]:
from preprocess.pipelinesV2 import build_pipeline,build_coco_pipeline
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import numpy as np

In [23]:
def get_coco_image(dataset,downscale=True,image_size=(299,299)):
    """Map function to get an array of images from the coco dataset."""
    img=dataset['image']
    
    img=tf.image.resize(img,image_size)
    if downscale:
        img=img/255
    return img
def get_coco_capts(dataset):
    """Map function to get an array of captions for every image. 
    
    Coco dataset uses 5 captions per image for most images but some have 6 or 7 
    we will crop them to 5 for ease of implementation of batching in the pipeline. 
    Otherwise tensorflow cannot batch different sizes. """
    
    captions='<start> '+ dataset['captions']['text'][0:5] + ' <end>'
    return captions

def get_coco_img_paths(dataset):
    """Map function to get the image paths from original coco dataset."""
    return dataset['image/filename']
def to_dict(image, text):
    """ To be called on dataset.map to get a dictionary"""
    return {'image': image, 'text': text}

In [24]:
downscale=False
image_size=(299,299)
# Load the dataset and it's info from tfds.
data,info = tfds.load(name='coco_captions',with_info=True)

# Obtain training images, training captions and training image paths. 
train_imgs = data['train'].map(lambda x:get_coco_image(x,downscale,image_size), 
                             num_parallel_calls=tf.data.AUTOTUNE)
train_capts = data['train'].map(get_coco_capts,num_parallel_calls=tf.data.AUTOTUNE)
train_img_paths=data['train'].map(get_coco_img_paths,num_parallel_calls=tf.data.AUTOTUNE)

 # Obtain validation images, captions and image paths. 
val_imgs = data['val'].map(lambda x:get_coco_image(x,downscale,image_size), 
                             num_parallel_calls=tf.data.AUTOTUNE)
val_capts = data['val'].map(get_coco_capts,num_parallel_calls=tf.data.AUTOTUNE)
val_img_paths=data['val'].map(get_coco_img_paths,num_parallel_calls=tf.data.AUTOTUNE)

# Obtain test images, captions and image paths
test_imgs = data['test'].map(lambda x:get_coco_image(x,downscale,image_size), 
                             num_parallel_calls=tf.data.AUTOTUNE) 
test_capts=data['test'].map(get_coco_capts,num_parallel_calls=tf.data.AUTOTUNE)
test_img_paths=data['test'].map(get_coco_img_paths,num_parallel_calls=tf.data.AUTOTUNE)

# Zip images and captions together 
train_data_captioning=tf.data.Dataset.zip((train_imgs,train_capts)).\
            map(to_dict,num_parallel_calls=tf.data.AUTOTUNE)
val_data_captioning=tf.data.Dataset.zip((val_imgs,val_capts)).\
            map(to_dict,num_parallel_calls=tf.data.AUTOTUNE)
test_data_captioning=tf.data.Dataset.zip((test_imgs,test_capts)).\
            map(to_dict,num_parallel_calls=tf.data.AUTOTUNE)


dataset_dictionary={
                      'captioning':{
                                    'train': train_data_captioning,
                                    'val': val_data_captioning,
                                    'test': test_data_captioning,
                                    'train_img_paths': train_img_paths,
                                    'val_img_paths': val_img_paths,
                                    'test_img_paths': test_img_paths,
                                    'train_captions': train_capts,
                                    'val_captions': val_capts,
                                    'test_captions': test_capts
                                  }}

In [25]:
for i in train_capts.take(1):
    print(i)

tf.Tensor(
[b'<start> A toilet and sink in a tiled bathroom <end>'
 b'<start> A unisex bathroom decorated with a vintage theme.  <end>'
 b'<start> A white toilet sitting next to a bidet toilet. <end>'
 b'<start> A bathroom with a toilet, sink, and other bathroom items in it.  <end>'
 b'<start> A bathroom with gold circle patterns containing a toilet, sink towel rack and shelving. <end>'], shape=(5,), dtype=string)
