In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import tensorflow as tf
import numpy as np
from tqdm import tqdm

In [None]:
!mkdir -p /content/dataset/original/Questions
!mkdir -p /content/dataset/original/Annotations
!mkdir -p /content/dataset/original/Images

!mkdir -p /content/dataset/preprocessed/Questions
!mkdir -p /content/dataset/preprocessed/Annotations
!mkdir -p /content/dataset/preprocessed/Images

In [None]:
# Annotations 
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Train_mscoco.zip
!unzip v2_Annotations_Train_mscoco.zip -d /content/dataset/original/Annotations && rm v2_Annotations_Train_mscoco.zip

#!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip
#!unzip v2_Annotations_Val_mscoco.zip -d /content/dataset/original/Annotations && rm v2_Annotations_Val_mscoco.zip

# Questions
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip
!unzip v2_Questions_Train_mscoco.zip -d /content/dataset/original/Questions && rm v2_Questions_Train_mscoco.zip

#!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip
#!unzip v2_Questions_Val_mscoco.zip -d /content/dataset/original/Questions && rm v2_Questions_Val_mscoco.zip

#!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip
#!unzip v2_Questions_Test_mscoco.zip -d /content/dataset/original/Questions && rm v2_Questions_Test_mscoco.zip

# Images
!wget http://images.cocodataset.org/zips/train2014.zip
!unzip train2014.zip -d /content/dataset/original/Images && rm train2014.zip

#!wget http://images.cocodataset.org/zips/val2014.zip
#!unzip val2014.zip -d /content/dataset/original/Images && rm val2014.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000408557.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000013714.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000194043.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000219859.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000278135.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000141015.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000280923.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000200024.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000435713.jpg  
 extracting: /content/dataset/original/Images/train2014/COCO_train2014_000000249993.jpg  
 extracting: /content/dataset/origi

In [None]:
input_dir = "/content/dataset/original/Images"
output_dir = "/content/drive/MyDrive/VQA_preprocessed/ImagesFeatures"

In [None]:
def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (224, 224))
    img = tf.keras.applications.vgg19.preprocess_input(img)
    return img, image_path

In [None]:
def feature_path(p, train_flag):
    # ./data/./img/train2014/COCO_train2014_000000178619.jpg
    directory = output_dir
    if not os.path.exists(directory):
        os.mkdir(directory)
    name = p.split('/')[-1].split('.')[0] + '.npy'
    if not os.path.exists(directory):
        os.mkdir(directory)
    directory = os.path.join(directory, 'train' if train_flag else 'test')
    if not os.path.exists(directory):
        os.mkdir(directory)
    return os.path.join(directory, name)

In [None]:
def extract_feature(img_path,
                    train_flag,
                    image_features_extract_model
                    ):
    # feature is in shape of [7*7, 512]

    unique_img = list(set(img_path))
    print("total image# to preprocess: ", len(unique_img))

    image_dataset = tf.data.Dataset.from_tensor_slices(unique_img)
    image_dataset = image_dataset.map(
        load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(64)

    for img, path in tqdm(image_dataset):
        batch_features = image_features_extract_model(img)
        batch_features = tf.reshape(batch_features,
                                    (batch_features.shape[0],
                                     -1, batch_features.shape[3]))
        for f, p in zip(batch_features, path):
            path_of_feature = p.numpy().decode("utf-8")
            path_of_feature = feature_path(path_of_feature, train_flag=train_flag)
            np.save(path_of_feature, f.numpy())

In [None]:
trains = np.load('/content/drive/MyDrive/VQA_preprocessed/train.npy', allow_pickle=True).tolist()
print(len(trains))
for item in trains:
  path = item[0]
  name = path.split('/')[-1]
  path = os.path.join(input_dir,'train2014', name)
  print(path)
  break

443757
/content/dataset/original/Images/train2014/COCO_train2014_000000458752.jpg


In [None]:
trains = np.load('/content/drive/MyDrive/VQA_preprocessed/train.npy', allow_pickle=True).tolist()
img_paths = [] 
for item in trains:
  path = item[0]
  name = path.split('/')[-1]
  path = os.path.join(input_dir, 'train2014', name)
  img_paths.append(path)
  # using vgg19 pool5 to extract image feature

image_model = tf.keras.applications.VGG19(include_top=False,
                                          weights='imagenet')
new_input = image_model.input
hidden_layer = image_model.layers[-1].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)
print("image model ready")

# extract image features
# Train

extract_feature(img_paths,
                True,
                image_features_extract_model
                )
print("train image done.")

image model ready
total image# to preprocess:  82783




  0%|          | 0/1294 [00:00<?, ?it/s][A[A

  0%|          | 1/1294 [00:01<31:05,  1.44s/it][A[A

  0%|          | 2/1294 [00:02<28:20,  1.32s/it][A[A

  0%|          | 3/1294 [00:03<27:33,  1.28s/it][A[A

  0%|          | 4/1294 [00:04<26:25,  1.23s/it][A[A

  0%|          | 5/1294 [00:05<26:23,  1.23s/it][A[A

  0%|          | 6/1294 [00:07<26:16,  1.22s/it][A[A

  1%|          | 7/1294 [00:08<25:33,  1.19s/it][A[A

  1%|          | 8/1294 [00:09<25:52,  1.21s/it][A[A

  1%|          | 9/1294 [00:10<24:50,  1.16s/it][A[A

  1%|          | 10/1294 [00:11<25:09,  1.18s/it][A[A

  1%|          | 11/1294 [00:13<25:42,  1.20s/it][A[A

  1%|          | 12/1294 [00:14<24:52,  1.16s/it][A[A

  1%|          | 13/1294 [00:15<25:11,  1.18s/it][A[A

  1%|          | 14/1294 [00:16<24:39,  1.16s/it][A[A

  1%|          | 15/1294 [00:17<25:03,  1.18s/it][A[A

  1%|          | 16/1294 [00:18<25:00,  1.17s/it][A[A

  1%|▏         | 17/1294 [00:20<24:47,  1.17s/i

train image done.





In [None]:
for item in trains[:10]:
  path = item[0]
  name = path.split('/')[-1][:-3] + 'npy'
  #org_path = os.path.join(input_dir, 'train2014', name)
  feature_path = os.path.join(output_dir, 'train', name)
  x = np.load(feature_path, allow_pickle=True).shape
  print(x)
  
  if not os.path.exists(feature_path):
    print('error')


(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
(49, 512)
