Skip to content

Commit

Permalink
Merge pull request #57 from nbgao/master
Browse files Browse the repository at this point in the history
Supplement for CLEVR
  • Loading branch information
MIL-VLG committed Jan 1, 2020
2 parents 558487c + c289984 commit c741f1c
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 0 deletions.
151 changes: 151 additions & 0 deletions data/clevr/clevr_extract_feat.py
@@ -0,0 +1,151 @@
# --------------------------------------------------------
# OpenVQA
# CLEVR images feature extraction script
# Written by Pengbing Gao https://github.com/nbgao
# --------------------------------------------------------

'''
Command line example:
python clevr_extract_feat.py --mode=all --gpu=0
python clevr_extract_feat.py --mode=train --gpu=0 --model=resnet101 --model_stage=3 --batch_size=128 --image_height=224 --image_width=224
'''

import argparse, os, json
import numpy as np
from scipy.misc import imread, imresize

import torch
import torchvision
torch.set_num_threads(5)


def build_model(args):
if not hasattr(torchvision.models, args.model):
raise ValueError('Invalid model "%s"' % args.model)
if not 'resnet' in args.model:
raise ValueError('Feature extraction only supports ResNets')

cnn = getattr(torchvision.models, args.model)(pretrained=True)
layers = [cnn.conv1,
cnn.bn1,
cnn.relu,
cnn.maxpool]
for i in range(args.model_stage):
name = 'layer%d' % (i + 1)
layers.append(getattr(cnn, name))

model = torch.nn.Sequential(*layers)
model.cuda()
model.eval()
return model


def batch_feat(cur_batch, model):
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)

image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
image_batch = (image_batch / 255.0 - mean) / std
image_batch = torch.FloatTensor(image_batch).cuda()
image_batch = torch.autograd.Variable(image_batch, volatile=True)

feats = model(image_batch)
feats = feats.data.cpu().clone().numpy()

return feats


def extract_feature(args, images_path, feats_npz_path):
input_paths = []
idx_set = set()
for file in os.listdir(images_path):
if not file.endswith('.png'):
continue
idx = int(os.path.splitext(file)[0].split('_')[-1])
input_paths.append((os.path.join(images_path, file), idx))
idx_set.add(idx)

input_paths.sort(key=lambda x: x[1])
assert len(idx_set) == len(input_paths)
assert min(idx_set) == 0 and max(idx_set) == len(idx_set) - 1
print('Image number:', len(input_paths))

model = build_model(args)

if not os.path.exists(feats_npz_path):
os.mkdir(feats_npz_path)
print('Create dir:', feats_npz_path)

img_size = (args.image_height, args.image_width)
ix = 0
cur_batch = []
for i, (path, idx) in enumerate(input_paths):
img = imread(path, mode='RGB')
img = imresize(img, img_size, interp='bicubic')
img = img.transpose(2, 0, 1)[None]
cur_batch.append(img)
if len(cur_batch) == args.batch_size:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')
cur_batch = []

if len(cur_batch) > 0:
feats = batch_feat(cur_batch, model)
for j in range(feats.shape[0]):
np.savez(feats_npz_path + str(ix) + '.npz', x=feats[j].reshape(1024, 196).transpose(1, 0))
ix += 1
print('Processed %d/%d images' % (ix, len(input_paths)), end='\r')

print('Extract image features to generate npz files sucessfully!')


parser = argparse.ArgumentParser(description='clevr_extract_feat')
parser.add_argument('--mode', '-mode', choices=['all', 'train', 'val', 'test'], default='all', help='mode', type=str)
parser.add_argument('--gpu', '-gpu', default='0', type=str)

parser.add_argument('--model', '-model', default='resnet101')
parser.add_argument('--model_stage', '-model_stage', default=3, type=int)
parser.add_argument('--batch_size', '-batch_size', default=128, type=int)

parser.add_argument('--image_height', '-image_height', default=224, type=int)
parser.add_argument('--image_width', '-image_width', default=224, type=int)


if __name__ == '__main__':
train_images_path = './raws/images/train/'
val_images_path = './raws/images/val/'
test_images_path = './raws/images/test/'
train_feats_npz_path = './feats/train/'
val_feats_npz_path = './feats/val/'
test_feats_npz_path = './feats/test/'

args = parser.parse_args()
print('mode:', args.mode)
print('gpu:', args.gpu)
print('model:', args.model)
print('model_stage:', args.model_stage)
print('batch_size:', args.batch_size)
print('image_height:', args.image_height)
print('image_width:', args.image_width)

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

# process train images features
if args.mode in ['train', 'all']:
print('\nProcess [train] images features:')
extract_feature(args, train_images_path, train_feats_npz_path)

# process val images features
if args.mode in ['val', 'all']:
print('\nProcess [val] images features:')
extract_feature(args, val_images_path, val_feats_npz_path)

# processs test images features
if args.mode in ['test', 'all']:
print('\nProcess [test] images features:')
extract_feature(args, test_images_path, test_feats_npz_path)

58 changes: 58 additions & 0 deletions docs/_source/basic/install.md
Expand Up @@ -84,6 +84,8 @@ Download the [spatial features](https://nlp.stanford.edu/data/gqa/spatialFeature
To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/gqa/gqa_feat_preproc.py) to transform `.h5` feature files into multiple `.npz` files, with each file corresponding to one image.

```bash
$ cd data/gqa

$ unzip spatialFeatures.zip
$ python gqa_feat_preproc.py --mode=spatial --spatial_dir=./spatialFeatures --out_dir=./feats/gqa-grid
$ rm -r spatialFeatures.zip ./spatialFeatures
Expand Down Expand Up @@ -145,5 +147,61 @@ All the question files and scene graph files are unzipped and placed in the `dat

### CLEVR

- Images, Questions and Scene Graphs

Download all the [CLEVR v1.0](https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip) from the official site, including all the splits needed for training, validation and testing.

All the image files, question files and scene graph files are unzipped and placed in the `data/clevr/raw` folder to form the following tree structure:

```
|-- data
|-- clevr
| |-- raw
| | |-- images
| | | |-- train
| | | | |-- CLEVR_train_000000.json
| | | | |-- ...
| | | | |-- CLEVR_train_069999.json
| | | |-- val
| | | | |-- CLEVR_val_000000.json
| | | | |-- ...
| | | | |-- CLEVR_val_014999.json
| | | |-- test
| | | | |-- CLEVR_test_000000.json
| | | | |-- ...
| | | | |-- CLEVR_test_014999.json
| | |-- questions
| | | |-- CLEVR_train_questions.json
| | | |-- CLEVR_val_questions.json
| | | |-- CLEVR_test_questions.json
| | |-- scenes
| | | |-- CLEVR_train_scenes.json
| | | |-- CLEVR_val_scenes.json
```

- Image Features

To make the input features consistent with those for VQA-v2, we provide a [script](https://github.com/MILVLG/openvqa/tree/master/data/clevr/clevr_extract_feat.py) to extract image features using a pre-trained ResNet-101 model like most previous works did and generate `.h5` files, with each file corresponding to one image.

```bash
$ cd data/clevr

$ python clevr_extract_feat.py --mode=all --gpu=0
```

All the processed feature files are placed in the `data/clevr/feats` folder to form the following tree structure:

```
|-- data
|-- clevr
| |-- feats
| | |-- train
| | | |-- 1.npz
| | | |-- ...
| | |-- val
| | | |-- 1.npz
| | | |-- ...
| | |-- test
| | | |-- 1.npz
| | | |-- ...
```

0 comments on commit c741f1c

Please sign in to comment.