In [None]:
image_to_captions = None
with open('./Helper/captions.txt') as f:
    image_to_captions = f.read()

image_to_captions = eval(image_to_captions)


In [2]:
split_factor = 0.9
split = int(split_factor*len(image_to_captions))

train_captions = dict(list(image_to_captions.items())[:split])
test_captions = dict(list(image_to_captions.items())[split:])

print('Number of training examples: ', len(train_captions))
print('Number of testing examples: ', len(test_captions))

Number of training examples:  7282
Number of testing examples:  810


In [3]:
## add start and end tokens

for img_id in train_captions.keys():
    for i in range(len(train_captions[img_id])):
        train_captions[img_id][i] = '<start> ' + train_captions[img_id][i] + ' <end>'

In [4]:
with open('./Helper/train_captions.txt','w') as f:
    f.write(str(train_captions))
with open('./Helper/test_captions.txt','w') as f:
    f.write(str(test_captions))

## Transfer Learning

In [5]:
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image 
from keras.models import Model
import numpy as np
import os
import pickle
import time

### Image -> Features Extraction

In [6]:
## 224,224 is the height and width of images on which resnet is pretrained
## 3 is for the color channels

resnet = ResNet50(weights="imagenet",input_shape=(224,224,3))
resnet.summary()

In [7]:
model = Model(inputs = resnet.input, outputs = resnet.layers[-2].output)
model.summary()

### Extracting image features and storing them 


In [8]:
def preprocess(path,img_to_captions):
    
    preprocessed_images = {} ## map img_id with feature vector of corresponding img
    cnt = 0
    
    for img_id in img_to_captions.keys():
        
        image_path = path + "\\" + img_id + '.jpg'
        try :
            img = image.load_img(image_path,target_size=(224,224))
        except :
            continue
        img = image.img_to_array(img)
        img = np.expand_dims(img,axis = 0) ## to make img look like batch of size 1 = (1,224,224,3)
        
        # Normalising
        img = preprocess_input(img)
        feature_vector = model.predict(img) # shape (1,2048)
        feature_vector = feature_vector.reshape((-1,)) # shape = 2048
        
        preprocessed_images[img_id] = feature_vector
        
        cnt += 1
        if cnt%100 == 0:
            print('|',end = ' ')
    
    print("Preprocessing Completed")
    return preprocessed_images

In [9]:
path = 'C:\\Users\Mukunda priya\Documents\Aaslesh\PROJECTS\AD 2\project\Image-Captioning\Dataset\Flickr8k_Dataset\Flicker8k_Dataset'
start_t = time.time()
train_images = preprocess(path, train_captions)
end_t = time.time()
print('Time taken: ', end_t - start_t)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms

In [10]:
with open('./Helper/train_images.pkl', 'wb') as f:
  pickle.dump(train_images, f)

In [11]:
test_images = preprocess(path,test_captions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [12]:
with open('./Helper/test_images.pkl', 'wb') as f:
  pickle.dump(test_images, f)