In [1]:
import numpy as np

import os
import gc
from pathlib import Path

In [2]:
path = Path(os.getcwd())
root = Path(path.parent.absolute())

model_path = root / 'Models' / 'Retrained Inception'
fetching_path = root / 'Shared Preprocessed Objects'

In [3]:
word2Index = np.load(fetching_path / "word2Index.npy", allow_pickle=True).item()
variable_params = np.load(fetching_path / "variable_params.npy", allow_pickle=True).item()

train_captions = np.load(fetching_path / "train_captions.npy", allow_pickle=True).item()
test_captions = np.load(fetching_path / "test_captions.npy", allow_pickle=True).item()

train_features = np.load(model_path / "train_features_full.npy", allow_pickle=True).item()
test_features = np.load(model_path / "test_features_full.npy", allow_pickle=True).item()

In [4]:
os.mkdir(model_path / 'training_triples')
os.mkdir(model_path / 'test_triples')

In [5]:
def save_batch(X1, X2, y, path, batch):
    np.save(path / ('features_batch_' + str(batch)), np.array(X1))
    np.save(path / ('captions_batch_' + str(batch)), np.array(X2))
    np.save(path / ('outputs_batch_' + str(batch)), np.array(y))

In [6]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
def save_LSTM_triples_batches(path, features, captions, word2Index, max_len, vocab_size, batch_size = 2048, show_progress = True):
    ''' This function generates the triples (feature, caption up until a certain word, next word of the true caption) that will be used to train the LSTM model. may take a minute to run.
    If batch_size > len(features) then it will generate only one batch. This is not very advisable, however, because this may cause OOM problems during training and even in this function.'''
    X1 = []
    X2 = []
    y  = []
    
    batch = 0
    count = 0
    for key in features:
        count += 1
        # Fetching features and captions
        image_features = features[key]
        caption = captions[key]
        
        # Encoding caption
        seq = [word2Index[word] for word in caption.split(' ') if word in word2Index]
        
        # Splitting encoded sequence into X,y pair
        for i in range(1, len(seq)):
            # input-output pair split
            input_seq, output_seq = seq[:i], seq[i]
            # padding input sequence
            input_seq = pad_sequences([input_seq], maxlen=max_len, padding = 'post')[0]
            # encoding output Sequence
            output_seq = to_categorical([output_seq], num_classes=vocab_size)[0]
            
            # appending and storage
            X1.append(image_features)
            X2.append(input_seq)
            y.append(output_seq)

        if count % batch_size == 0:
            # Save current batch
            save_batch(X1, X2, y, path, batch)
            
            # Reset Batch
            X1 = []
            X2 = []
            y  = []
            gc.collect()
            
            batch += 1
            print('Treated ' + str(batch_size*batch) + ' images out of ' + str(len(features)))

    save_batch(X1, X2, y, path, batch)
    
    return batch + 1

In [7]:
# Also takes a long time. Around 30 minutes 
# Saving training triples
num_batches_training = save_LSTM_triples_batches(model_path / 'training_triples', train_features, 
                                   train_captions, word2Index, variable_params['max_caption_len'], 
                                   variable_params['vocab_size'], batch_size = 9)

# Saving test triples. I am not sure this is required.
num_batches_test = save_LSTM_triples_batches(model_path / 'test_triples', test_features, 
                                   test_captions, word2Index, variable_params['max_caption_len'], 
                                   variable_params['vocab_size'], batch_size = 9)

Treated 9 images out of 100
Treated 18 images out of 100
Treated 27 images out of 100
Treated 36 images out of 100
Treated 45 images out of 100
Treated 54 images out of 100
Treated 63 images out of 100
Treated 72 images out of 100
Treated 81 images out of 100
Treated 90 images out of 100
Treated 99 images out of 100
Treated 9 images out of 50
Treated 18 images out of 50
Treated 27 images out of 50
Treated 36 images out of 50
Treated 45 images out of 50
