# nlp-project-vqa


In [None]:
obs_path = "s3://nlp-haofeng/nlp_project_vqa/"

In [None]:
import moxing as mox
mox.file.copy_parallel(src_url=obs_path+"mindrecord", dst_url='./mindrecord') 
mox.file.copy_parallel(src_url=obs_path+"preprocess", dst_url='./preprocess')
mox.file.copy_parallel(src_url=obs_path+"utils",      dst_url='./utils')
mox.file.copy_parallel(src_url=obs_path+"data",       dst_url='./data')

In [10]:
import mindspore
import numpy as np
import os
from easydict import EasyDict
from preprocess.preprocess import *
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # 允许重复载入lib文件

图模式

In [11]:
from mindspore import context
context.set_context(mode=context.GRAPH_MODE)

PyNative模式

In [2]:
from mindspore import context
context.set_context(mode=context.PYNATIVE_MODE)

Ascend 环境安装 MindSpore Hub

In [None]:
! pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.3.0/Hub/any/mindspore_hub-1.3.0-py3-none-any.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple

## 1 预处理

### 1.1 预处理配置

In [12]:
padding = '<pad>'
config = EasyDict({
	'train_img_path': './data/images/train/COCO_train2014_',
	'train_ans_path': './data/annotations/train.json',
	'train_que_path': './data/questions/train.json',
	'valid_img_path': './data/images/val/COCO_val2014_',
	'valid_ans_path': './data/annotations/val.json',
	'valid_que_path': './data/questions/val.json',
	'test_img_path': './data/images/test/COCO_val2014_',
	'test_ans_path':  './data/annotations/test.json',
	'test_que_path':  './data/questions/test.json',
	'max_length': 25,
	'dict_path': './mindrecord/dict.npy',
	'idx_word_dict_path': './mindrecord/idx_word_dict.npy',
	'num_splits': 1,
	'train_mindrecord_path': './mindrecord/train.mindrecord',
	'valid_mindrecord_path': './mindrecord/valid.mindrecord',
	'test_mindrecord_path':  './mindrecord/test.mindrecord',
})

### 1.2 读取数据

注: 只取那些答案长度为1的vqa组合

In [4]:
# get 3 types of input data
train_images, train_questions, train_answers = get_list(config.train_que_path, config.train_ans_path)
valid_images, valid_questions, valid_answers = get_list(config.valid_que_path, config.valid_ans_path)
test_images,  test_questions,  test_answers  = get_list(config.test_que_path,  config.test_ans_path)

In [5]:
total_questions = train_questions + valid_questions + test_questions
total_answers   = train_answers + valid_answers + test_answers

### 1.3 构建词典

In [6]:
# build word vocab
word_dict = dict({'<pad>': 0})
word_dict = add_word_into_dict(total_questions, word_dict)
word_dict = add_word_into_dict(total_answers, word_dict)

In [7]:
# build revert dict
idx_word_dict = dict()
for item in word_dict.items():
	idx_word_dict[item[1]] = item[0]

In [8]:
# save dict
np.save(config.dict_path, word_dict)
np.save(config.idx_word_dict_path, idx_word_dict)

In [None]:
mox.file.copy_parallel(src_url="./mindrecord/dict.npy", dst_url=obs_path+"mindrecord/dict.npy") 
mox.file.copy_parallel(src_url="./mindrecord/idx_word_dict.npy", dst_url=obs_path+"mindrecord/idx_word_dict.npy") 

### 1.4 向量化 & 补齐长度

In [9]:
# word -> vector & padding
train_questions_vec = get_vec_and_pad(train_questions, word_dict, config.max_length)
valid_questions_vec = get_vec_and_pad(valid_questions, word_dict, config.max_length)
test_questions_vec = get_vec_and_pad(test_questions, word_dict, config.max_length)

train_answers_vec = get_vec_and_pad(train_answers, word_dict, 1)
valid_answers_vec = get_vec_and_pad(valid_answers, word_dict, 1)
test_answers_vec = get_vec_and_pad(test_answers, word_dict, 1)

### 1.5 生成MindRecord

train

In [None]:
generate_mindrecord(config.train_mindrecord_path, config.train_img_path, config.num_splits, train_images, train_questions_vec, train_answers_vec)

In [None]:
mox.file.copy_parallel(src_url="./mindrecord/train.mindrecord",    dst_url=obs_path+"mindrecord/train.mindrecord") 
mox.file.copy_parallel(src_url="./mindrecord/train.mindrecord.db", dst_url=obs_path+"mindrecord/train.mindrecord.db") 

valid

In [None]:
generate_mindrecord(config.valid_mindrecord_path, config.valid_img_path, config.num_splits, valid_images, valid_questions_vec, valid_answers_vec)

In [None]:
mox.file.copy_parallel(src_url="./mindrecord/valid.mindrecord",    dst_url=obs_path+"mindrecord/valid.mindrecord") 
mox.file.copy_parallel(src_url="./mindrecord/valid.mindrecord.db", dst_url=obs_path+"mindrecord/valid.mindrecord.db") 

test

In [None]:
generate_mindrecord(config.test_mindrecord_path, config.test_img_path, config.num_splits, test_images, test_questions_vec, test_answers_vec)

In [None]:
mox.file.copy_parallel(src_url="./mindrecord/test.mindrecord",    dst_url=obs_path+"mindrecord/test.mindrecord") 
mox.file.copy_parallel(src_url="./mindrecord/test.mindrecord.db", dst_url=obs_path+"mindrecord/test.mindrecord.db") 

## 2 加载数据

### 2.1 加载词典

In [13]:
# load dict
word_dict = np.load(config.dict_path, allow_pickle=True).item()
idx_word_dict = np.load(config.idx_word_dict_path, allow_pickle=True).item()

### 2.2 训练配置

In [14]:
model_name = 'baseline'

In [15]:
# batch_scale = 'normal'
batch_scale = 'large'

In [16]:
train_config = EasyDict({
	'model': model_name,
	'vocab_size': 10233,
	'batch_size': 128 if batch_scale == 'large' else 32,
	'epoch_size': 3,
	'max_length': 25,
	'hidden_size': 1024,
	'lr': 1e-3,
	'momentum': 0.9,
	'early_stop': 100,
	# 'save_checkpoint_steps': 1279,
	'ckpt_save_path': './ckpt',
	'checkpoint_path': './ckpt/'+model_name+'.ckpt',
	# 'keep_checkpoint_max': 2,
})

### 2.3 生成数据集

In [27]:
# platform = 'Local'
# platform = 'Colob'
platform = 'Ascend'

In [28]:
num_parallel_workers = 4 if platform == 'Ascend' or platform == 'Local' else 2
print(num_parallel_workers)
# create dataset
train_dataset = generate_dataset(config.train_mindrecord_path, train_config.batch_size, 1, num_parallel_workers)
valid_dataset = generate_dataset(config.valid_mindrecord_path, train_config.batch_size, 1, num_parallel_workers)
test_dataset  = generate_dataset(config.test_mindrecord_path , train_config.batch_size, 1, num_parallel_workers)

4


## 3 训练模型

### 3.1 创建模型

In [9]:
import mindspore.nn as nn
import mindspore.ops.operations as P
from utils.metric_utils import *
from utils.wrapper_utils import *
from utils.callback_utils import *

In [10]:
class Network(nn.Cell):
	def __init__(self, train_config):
		super(Network, self).__init__()
		self.reshape = P.Reshape()
		self.embedding = nn.Embedding(train_config.vocab_size, train_config.hidden_size)
		self.out = nn.Dense(train_config.hidden_size*train_config.max_length, train_config.vocab_size)
	def construct(self, images, questions):
		x = self.embedding(questions)
		x = x.reshape(x.shape[0], -1)
		x = self.out(x)
		return x

In [11]:
# 创建网络
network = Network(train_config)
network = LossAndAccWrapper(network, train_config)

### 3.2 开始训练

In [12]:
def train(network, train_dataset, valid_dataset, train_config):
	# 创建文件夹
	if not os.path.exists(train_config.ckpt_save_path):
		os.mkdir(train_config.ckpt_save_path)
	
	# 创建模型
	model = mindspore.Model(network, eval_network=network, metrics={'acc': DummyAccuracyMetric()})
	
	# 获取回调函数
	train_callbacks, _ = get_network_callbacks(model, train_dataset, valid_dataset, train_config)

	# 训练，保留最好模型
	model.train(train_config.epoch_size, train_dataset, callbacks=train_callbacks, dataset_sink_mode=True)


In [13]:
train(network, train_dataset, valid_dataset, train_config)



epoch: 1 , step: 1  train loss = 9.227992 acc = 0.0
epoch: 1 , step: 2  train loss = 9.229631 acc = 0.0
epoch: 1 , step: 3  train loss = 9.229217 acc = 0.0
epoch: 1 , step: 4  train loss = 9.229599 acc = 0.0
epoch: 1 , step: 5  train loss = 9.228811 acc = 0.0
epoch: 1 , step: 6  train loss = 9.229576 acc = 0.0
epoch: 1 , step: 7  train loss = 9.227288 acc = 0.0
epoch: 1 , step: 8  train loss = 9.2288885 acc = 0.0
epoch: 1 , step: 9  train loss = 9.228852 acc = 0.0
epoch: 1 , step: 10  train loss = 9.2281475 acc = 0.0
epoch: 1 , step: 11  train loss = 9.227444 acc = 0.0
epoch: 1 , step: 12  train loss = 9.2328615 acc = 0.0
epoch: 1 , step: 13  train loss = 9.227057 acc = 0.0
epoch: 1 , step: 14  train loss = 9.228956 acc = 0.0078125
epoch: 1 , step: 15  train loss = 9.229855 acc = 0.0
epoch: 1 , step: 16  train loss = 9.226218 acc = 0.0
epoch: 1 , step: 17  train loss = 9.228907 acc = 0.0
epoch: 1 , step: 18  train loss = 9.227785 acc = 0.0
epoch: 1 , step: 19  train loss = 9.226655 acc



  valid loss = 9.228681 acc = 0.0
epoch: 2 , step: 320  train loss = 9.227678 acc = 0.0
epoch: 2 , step: 321  train loss = 9.228011 acc = 0.0
epoch: 2 , step: 322  train loss = 9.229348 acc = 0.0
epoch: 2 , step: 323  train loss = 9.2278805 acc = 0.0
epoch: 2 , step: 324  train loss = 9.228877 acc = 0.0
epoch: 2 , step: 325  train loss = 9.230395 acc = 0.0
epoch: 2 , step: 326  train loss = 9.229669 acc = 0.0
epoch: 2 , step: 327  train loss = 9.225772 acc = 0.0
epoch: 2 , step: 328  train loss = 9.229613 acc = 0.0
epoch: 2 , step: 329  train loss = 9.230722 acc = 0.0
epoch: 2 , step: 330  train loss = 9.229233 acc = 0.0
epoch: 2 , step: 331  train loss = 9.226866 acc = 0.0
epoch: 2 , step: 332  train loss = 9.227821 acc = 0.0
epoch: 2 , step: 333  train loss = 9.2279625 acc = 0.0
epoch: 2 , step: 334  train loss = 9.228467 acc = 0.0
epoch: 2 , step: 335  train loss = 9.229416 acc = 0.0
epoch: 2 , step: 336  train loss = 9.229816 acc = 0.0
epoch: 2 , step: 337  train loss = 9.228094 ac



  valid loss = 9.228742 acc = 0.0
epoch: 3 , step: 639  train loss = 9.225331 acc = 0.0
epoch: 3 , step: 640  train loss = 9.229488 acc = 0.0
epoch: 3 , step: 641  train loss = 9.228769 acc = 0.0
epoch: 3 , step: 642  train loss = 9.227337 acc = 0.0
epoch: 3 , step: 643  train loss = 9.231872 acc = 0.0
epoch: 3 , step: 644  train loss = 9.231563 acc = 0.0
epoch: 3 , step: 645  train loss = 9.229658 acc = 0.0
epoch: 3 , step: 646  train loss = 9.227373 acc = 0.0
epoch: 3 , step: 647  train loss = 9.226277 acc = 0.0
epoch: 3 , step: 648  train loss = 9.230388 acc = 0.0
epoch: 3 , step: 649  train loss = 9.228862 acc = 0.0
epoch: 3 , step: 650  train loss = 9.232277 acc = 0.0
epoch: 3 , step: 651  train loss = 9.228915 acc = 0.0
epoch: 3 , step: 652  train loss = 9.230385 acc = 0.0
epoch: 3 , step: 653  train loss = 9.22846 acc = 0.0
epoch: 3 , step: 654  train loss = 9.229371 acc = 0.0
epoch: 3 , step: 655  train loss = 9.227113 acc = 0.0
epoch: 3 , step: 656  train loss = 9.225996 acc =



  valid loss = 9.22879 acc = 0.0


## 4 测试模型

### 4.1 创建测试模型

In [None]:
from mindspore import load_checkpoint

In [None]:
network = Network(train_config)
load_checkpoint(train_config.checkpoint_path, net=network)
network = LossAndAccWrapper(network, train_config)

### 4.2 开始测试

In [None]:
def test(network, test_dataset):
	# 创建测试模型
	model = mindspore.Model(network, metrics={'acc': DummyAccuracyMetric()})

	# 回调显示
	test_callbacks = [TestCallback()]

	# 生成结果
	model.eval(test_dataset, callbacks=test_callbacks, dataset_sink_mode=True)

In [None]:
test(network, test_dataset)