## Preparing Photo Data

### Imports

In [1]:
import os
from glob import glob
from pickle import dump
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
from tqdm import tqdm

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def extract_features(directory):
    
    model = VGG16()
    
    model.layers.pop()
    model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
    
    print(model.summary())
    
    features = dict()
    
    for name in tqdm(os.listdir(directory)):
        file_name = directory + '/' + name
        image = load_img(file_name, target_size=(224, 224))
        
        image = img_to_array(image)
        
        image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])
        
        image = preprocess_input(image)
        
        feature = model.predict(image, verbose=0)
        
        image_id = name.split('.')[0]
        
        features[image_id] = feature
    return features

In [3]:
directory = 'dataset/images'
features = extract_features(directory)

print('Extracted Features: %d' % len(features))

dump(features, open('dataset/features.pkl', 'wb'))




  0%|          | 0/8091 [00:00<?, ?it/s]

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

100%|██████████| 8091/8091 [27:57<00:00,  4.82it/s]


Extracted Features: 8091


## Preparing Text Data

In [5]:
import string

In [6]:
def load_doc(filename):
    
    file = open(filename, 'r')
    
    text = file.read()
    
    file.close()
    return text

def load_descriptions(doc):
    
    mapping = dict()
    
    for line in doc.split('\n'):
        
        tokens = line.split()
        
        if len(line) < 2:
            continue
            
        image_id, image_desc = tokens[0], tokens[1:]
        
        image_id = image_id.split('.')[0]
        
        image_desc = ' '.join(image_desc)
        
        if image_id not in mapping:
            mapping[image_id] = list()
            
        mapping[image_id].append(image_desc)
    return mapping

def clean_descriptions(descriptions):
    
    table = str.maketrans('', '', string.punctuation)
    
    for key, desc_list in descriptions.items():
        for i in range(len(desc_list)):
            desc = desc_list[i]
            
            desc = desc.split()
            
            desc = [word.lower() for word in desc]
            
            desc = [w.translate(table) for w in desc]
            
            desc = [word for word in desc if len(word) > 1]
            
            desc = [word for word in desc if word.isalpha()]
            
            desc_list[i] = ' '.join(desc)
            
def to_vocabulary(descriptions):
    
    all_desc = set()
    
    for key in descriptions.keys():
        [all_desc.update(d.split()) for d in descriptions[key]]
        
    return all_desc

def save_descriptions(descriptions, filename):
    lines = list()
    
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + ' ' + desc)
            
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()

In [8]:
filename = 'dataset/text/Flickr8k.token.txt'

doc = load_doc(filename)

descriptions = load_descriptions(doc)

print('Loaded: %d ' % len(descriptions))

clean_descriptions(descriptions)

vocabulary = to_vocabulary(descriptions)
print('Vocabulary size: %d' % len(vocabulary))

save_descriptions(descriptions, 'dataset/descriptions.txt')

Loaded: 8092 
Vocabulary size: 8763
